In [1]:
'''
In this file we train SketchNet
'''
import torch
import os
import numpy as np
from torch import optim
from torch.nn import functional as F
from SketchVAE.sketchvae import SketchVAE
from torch import optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch.distributions import Normal
from SketchNet.sketchnet import SketchNet
from utils.helpers import *
import time
###############################
# initial parameters
s_dir = "" # folder address
zp_dims = 128
zr_dims = 128
pf_dims = 512
gen_dims = 1024
combine_dims = 512
combine_head = 4
combine_num = 4
pf_num = 2
inpaint_len = 4
seq_len = 16
total_len = 16
batch_size = 32
n_epochs = 15 
save_path = "model_backup"
save_period = 5 # save every 5 epochs
data_path = [
    "data/irish-dis-measure-vae-train-whole.npy",
    "data/irish-dis-measure-vae-validate-whole.npy",
    "data/irish-dis-measure-vae-test-whole.npy"
]
lr = 1e-4
decay = 0.9999
##############################
##############  for vae init ##############
vae_hidden_dims = 1024
vae_zp_dims = 128
vae_zr_dims = 128
vae_beta = 0.1
vae_input_dims = 130
vae_pitch_dims = 129
vae_rhythm_dims = 3
vae_seq_len = 6 * 4
vae_beat_num = 4
vae_tick_num = 6
############################



In [2]:
# Warmup schedule
class CustomSchedule:
    def __init__(self, d_model, warmup_steps=4000, optimizer=None):
        super(CustomSchedule, self).__init__()

        self.d_model = d_model
        self.optimizer = optimizer
        self.warmup_steps = warmup_steps

        self._step = 0
        self._rate = 0

    def step(self):
        "Update parameters and rate"
        self._step += 1
        rate = self.rate()
        for p in self.optimizer.param_groups:
            p['lr'] = rate
        self._rate = rate
        self.optimizer.step()

    def rate(self, step=None):
        if step is None:
            step = self._step
        arg1 = step ** (-0.5)
        arg2 = step * (self.warmup_steps ** -1.5)

        return self.d_model ** (-0.5) * min(arg1, arg2)

In [3]:
# input data dis-measure-vae
def processed_data_tensor(data):
    print("processed data:")
    gd = [] 
    px = []
    rx = []
    len_x = []
    nrx = []
    total = 0
    for i, d in enumerate(data):
        gd.append([list(dd[0]) for dd in d])
        px.append([list(dd[1]) for dd in d])
        rx.append([list(dd[2]) for dd in d])
        len_x.append([dd[3] for dd in d])
        if len(gd[-1][-1]) != vae_seq_len:
            gd[-1][-1].extend([128] * (vae_seq_len - len(gd[-1][-1])))
            px[-1][-1].extend([128] * (vae_seq_len - len(px[-1][-1])))
            rx[-1][-1].extend([2] * (vae_seq_len - len(rx[-1][-1])))
    for i,d in enumerate(len_x):
        for j,dd in enumerate(d):
            if len_x[i][j] == 0:
                gd[i][j][0] = 60
                px[i][j][0] = 60
                rx[i][j][0] = 1
                len_x[i][j] = 1
                total += 1
    gd = np.array(gd)
    px = np.array(px)
    rx = np.array(rx)
    len_x = np.array(len_x)
    for d in rx:
        nnrx = []
        for dd in d:
            temp = np.zeros((vae_seq_len, vae_rhythm_dims))
            lins = np.arange(0, len(dd))
            temp[lins, dd - 1] = 1
            nnrx.append(temp)
        nrx.append(nnrx)
    nrx = np.array(nrx)
    gd = torch.from_numpy(gd).long()
    px = torch.from_numpy(px).long()
    rx = torch.from_numpy(rx).float()
    len_x = torch.from_numpy(len_x).long()
    nrx = torch.from_numpy(nrx).float()
    print("processed finish! zeros:", total)
    print(gd.size(),px.size(),rx.size(),len_x.size(),nrx.size())
    return TensorDataset(px, rx, len_x, nrx, gd)

train_set = np.load(os.path.join(s_dir,data_path[0]),allow_pickle = True)
train_loader = DataLoader(
    dataset = processed_data_tensor(train_set),
    batch_size = batch_size, 
    shuffle = True, 
    num_workers = 8, 
    pin_memory = True, 
    drop_last = True
)

validate_set = np.load(os.path.join(s_dir,data_path[1]),allow_pickle = True)
validate_loader = DataLoader(
    dataset = processed_data_tensor(validate_set),
    batch_size = batch_size, 
    shuffle = False, 
    num_workers = 8, 
    pin_memory = True, 
    drop_last = True
)
validate_data = []
for i,d in enumerate(validate_loader):
    validate_data.append(d)
print(len(validate_data))

processed data:
processed finish! zeros: 256
torch.Size([39798, 16, 24]) torch.Size([39798, 16, 24]) torch.Size([39798, 16, 24]) torch.Size([39798, 16]) torch.Size([39798, 16, 24, 3])
processed data:
processed finish! zeros: 1
torch.Size([2927, 16, 24]) torch.Size([2927, 16, 24]) torch.Size([2927, 16, 24]) torch.Size([2927, 16]) torch.Size([2927, 16, 24, 3])
91


In [4]:
# load VAE model
vae_model = SketchVAE(
    vae_input_dims, vae_pitch_dims, vae_rhythm_dims, vae_hidden_dims, 
    vae_zp_dims, vae_zr_dims, vae_seq_len, vae_beat_num, vae_tick_num, 4000)
dic = torch.load(os.path.join(save_path, "sketchvae-param.pt"))

for name in list(dic.keys()):
    dic[name.replace('module.', '')] = dic.pop(name)
vae_model.load_state_dict(dic)

if torch.cuda.is_available():
    print('Using: ', torch.cuda.get_device_name(torch.cuda.current_device()))
    vae_model.cuda()
else:
    print('Using: CPU')
vae_model.eval()
print(vae_model.training)

Using:  Tesla V100-SXM2-32GB
False


In [5]:
# import model

# think about traning with mse
model = SketchNet(
    zp_dims, zr_dims, 
    pf_dims, gen_dims, combine_dims,
    pf_num, combine_num, combine_head,
    inpaint_len, total_len, 
    vae_model, True
)
# stage-1 traning result
dic = torch.load(os.path.join(save_path,"sketchnet-stage-1-param.pt"))
for name in list(dic.keys()):
    dic[name.replace('module.', '')] = dic.pop(name)
model.load_state_dict(dic)
model.set_stage("sketch")
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),lr=lr)
scheduler = CustomSchedule(combine_dims, optimizer=optimizer)

if torch.cuda.is_available():
    print('Using: ', torch.cuda.get_device_name(torch.cuda.current_device()))
    model.cuda()
else:
    print('Using: CPU')
print(model)

Using:  Tesla V100-SXM2-32GB
SketchNet(
  (past_p_gru): GRU(128, 512, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  (past_r_gru): GRU(128, 512, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  (future_p_gru): GRU(128, 512, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  (future_r_gru): GRU(128, 512, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  (gen_p_gru): GRU(128, 1024, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  (gen_r_gru): GRU(128, 1024, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  (gen_p_out): Linear(in_features=2048, out_features=128, bias=True)
  (gen_r_out): Linear(in_features=2048, out_features=128, bias=True)
  (combine_in): Linear(in_features=256, out_features=512, bias=True)
  (combine_posenc): PositionalEncoding()
  (combine_dropout): Dropout(p=0.1, inplace=False)
  (combine_nn): ModuleList(
    (0): CombineLayer(
      (slf_attn): MultiHeadAttention

In [6]:
def process_raw_x(raw_x, n_past, n_inpaint, n_future):
    raw_px, raw_rx, raw_len_x, raw_nrx, raw_gd = raw_x
    past_px = raw_px[:,:n_past,:]
    inpaint_px = raw_px[:,n_past:n_past + n_inpaint,:]
    future_px = raw_px[:,n_future:,:]
    past_rx = raw_rx[:,:n_past,:]
    inpaint_rx = raw_rx[:,n_past:n_past + n_inpaint,:]
    future_rx = raw_rx[:,n_future:,:]
    past_len_x = raw_len_x[:,:n_past]
    inpaint_len_x = raw_len_x[:,n_past:n_past + n_inpaint]
    future_len_x = raw_len_x[:,n_future:]
    past_nrx = raw_nrx[:,:n_past,:]
    inpaint_nrx = raw_nrx[:,n_past:n_past + n_inpaint,:]
    future_nrx = raw_nrx[:,n_future:,:]
    past_gd = raw_gd[:,:n_past,:]
    inpaint_gd = raw_gd[:,n_past:n_past + n_inpaint,:]
    future_gd = raw_gd[:,n_future:,:]
    re = [
        past_px, past_rx, past_len_x, past_nrx, past_gd,
        inpaint_px, inpaint_rx, inpaint_len_x, inpaint_nrx, inpaint_gd,
        future_px, future_rx, future_len_x, future_nrx, future_gd,
    ]
    return re
def get_acc(recon, gd):
    recon = recon.cpu().detach().numpy()
    gd = gd.cpu().detach().numpy()
    return np.sum(recon == gd) / recon.size
# stage-2 training
model.set_stage("sketch")
device = torch.device(torch.cuda.current_device())
losses = []
step = 0
n_past = 6
n_future = 10
n_inpaint = 4
iteration = 0
# save_period = 200
print(vae_model.training)
for epoch in range(n_epochs):
    model.train()
    print("epoch: %d\n__________________________________________" % (epoch), flush = True)
    mean_loss = 0.0
    mean_acc = 0.0
    v_mean_loss = 0.0
    v_mean_acc = 0.0
    total = 0
    for i,tr_data in enumerate(train_loader):
        model.train()
#         print("begin_time", time.process_t



ime())
        j = i % len(validate_data)
        raw_x = process_raw_x(tr_data, n_past, n_inpaint, n_future)
        for k in range(len(raw_x)):
            raw_x[k] = raw_x[k].to(device = device,non_blocking = True)
        past_px, past_rx, past_len_x, past_nrx, past_gd, \
        inpaint_px, inpaint_rx, inpaint_len_x, inpaint_nrx, inpaint_gd,\
        future_px, future_rx, future_len_x, future_nrx, future_gd = raw_x
        inpaint_gd_whole = inpaint_gd.contiguous().view(-1)
        past_x = [past_px, past_rx, past_len_x, past_nrx, past_gd]
        inpaint_x = [inpaint_px, inpaint_rx, inpaint_len_x, inpaint_nrx, inpaint_gd]
        future_x = [future_px, future_rx, future_len_x, future_nrx, future_gd]
        
        # validate
        v_raw_x = process_raw_x(validate_data[j], n_past, n_inpaint, n_future)
        for k in range(len(v_raw_x)):
            v_raw_x[k] = v_raw_x[k].to(device = device,non_blocking = True)
        v_past_px, v_past_rx, v_past_len_x, v_past_nrx, v_past_gd, \
        v_inpaint_px, v_inpaint_rx, v_inpaint_len_x, v_inpaint_nrx, v_inpaint_gd,\
        v_future_px, v_future_rx, v_future_len_x, v_future_nrx, v_future_gd = v_raw_x
        v_inpaint_gd_whole = v_inpaint_gd.contiguous().view(-1)
        v_past_x = [v_past_px, v_past_rx, v_past_len_x, v_past_nrx, v_past_gd]
        v_inpaint_x = [v_inpaint_px, v_inpaint_rx, v_inpaint_len_x, v_inpaint_nrx, v_inpaint_gd]
        v_future_x = [v_future_px, v_future_rx, v_future_len_x, v_future_nrx, v_future_gd]
        
        scheduler.optimizer.zero_grad()
        
        recon_x, iteration, use_teacher, stage = model(past_x, future_x, inpaint_x)
        
        loss = F.cross_entropy(recon_x.view(-1, recon_x.size(-1)), inpaint_gd_whole, reduction = "mean") 
        acc = get_acc(recon_x.view(-1, recon_x.size(-1)).argmax(-1), inpaint_gd_whole)
        loss.backward()
        scheduler.step()    
        total += 1
        mean_loss += loss.item()
        mean_acc += acc
        model.eval()
        with torch.no_grad():
            v_recon_x, _,_,_ = model(v_past_x, v_future_x, v_inpaint_x)
            v_loss = F.cross_entropy(v_recon_x.view(-1, v_recon_x.size(-1)), v_inpaint_gd_whole, reduction = "mean") 
            v_acc = get_acc(v_recon_x.view(-1, v_recon_x.size(-1)).argmax(-1), v_inpaint_gd_whole)
            v_mean_loss += v_loss.item()
            v_mean_acc += v_acc
        print("batch %d loss: %.5f acc: %.5f | v_loss: %.5f v_acc: %.5f |  iteration: %d teacher: %d stage: %s lr: %f" \
              % (i,loss.item(), acc, v_loss.item(), v_acc, iteration, use_teacher, stage, scheduler.rate()),flush = True)
    mean_loss /= total
    mean_acc /= total
    v_mean_loss /= total
    v_mean_acc /= total
    print("epoch %d loss: %.5f acc: %.5f | v_loss: %.5f v_acc: %.5f "  % (epoch,mean_loss, mean_acc, v_mean_loss, v_mean_acc),flush = True)
    losses.append([mean_loss, v_mean_loss])
    if (epoch + 1) % save_period == 0:
        filename = "sketchnet-" + 'loss_' + str(v_mean_loss) + "_acc_" + str(v_mean_acc) + "_epoch_" +  str(epoch+1) + '_it_' + str(iteration) + ".pt"
        torch.save(model.cpu().state_dict(),os.path.join(save_path,filename))
        model.cuda()
    np.save(os.path.join(s_dir, "sketchnet_log.npy"),losses)


False
epoch: 0
__________________________________________
batch 0 loss: 4.49439 acc: 0.64323 | v_loss: 4.13550 v_acc: 0.66992 |  iteration: 1 teacher: 0 stage: sketch lr: 0.000000
batch 1 loss: 3.69742 acc: 0.66829 | v_loss: 4.31203 v_acc: 0.66211 |  iteration: 2 teacher: 1 stage: sketch lr: 0.000000
batch 2 loss: 4.13041 acc: 0.65397 | v_loss: 4.54849 v_acc: 0.63802 |  iteration: 3 teacher: 1 stage: sketch lr: 0.000001
batch 3 loss: 4.00195 acc: 0.66276 | v_loss: 3.99588 v_acc: 0.66764 |  iteration: 4 teacher: 0 stage: sketch lr: 0.000001
batch 4 loss: 4.04750 acc: 0.66374 | v_loss: 4.28190 v_acc: 0.65007 |  iteration: 5 teacher: 0 stage: sketch lr: 0.000001
batch 5 loss: 4.52034 acc: 0.64453 | v_loss: 3.86095 v_acc: 0.66960 |  iteration: 6 teacher: 0 stage: sketch lr: 0.000001
batch 6 loss: 4.10044 acc: 0.66536 | v_loss: 4.13147 v_acc: 0.65755 |  iteration: 7 teacher: 1 stage: sketch lr: 0.000001
batch 7 loss: 4.41390 acc: 0.64095 | v_loss: 4.66618 v_acc: 0.63216 |  iteration: 8 teac

batch 66 loss: 2.02520 acc: 0.65658 | v_loss: 2.51530 v_acc: 0.62370 |  iteration: 67 teacher: 1 stage: sketch lr: 0.000012
batch 67 loss: 2.14753 acc: 0.64779 | v_loss: 2.27943 v_acc: 0.63835 |  iteration: 68 teacher: 0 stage: sketch lr: 0.000012
batch 68 loss: 2.43286 acc: 0.63444 | v_loss: 1.87237 v_acc: 0.67415 |  iteration: 69 teacher: 1 stage: sketch lr: 0.000012
batch 69 loss: 2.10046 acc: 0.65462 | v_loss: 1.91073 v_acc: 0.66927 |  iteration: 70 teacher: 1 stage: sketch lr: 0.000012
batch 70 loss: 1.88043 acc: 0.66862 | v_loss: 1.76721 v_acc: 0.67513 |  iteration: 71 teacher: 0 stage: sketch lr: 0.000012
batch 71 loss: 2.08845 acc: 0.65430 | v_loss: 1.90802 v_acc: 0.66569 |  iteration: 72 teacher: 0 stage: sketch lr: 0.000013
batch 72 loss: 1.87090 acc: 0.66992 | v_loss: 1.90982 v_acc: 0.67350 |  iteration: 73 teacher: 0 stage: sketch lr: 0.000013
batch 73 loss: 1.98974 acc: 0.66211 | v_loss: 1.81301 v_acc: 0.67546 |  iteration: 74 teacher: 1 stage: sketch lr: 0.000013
batch 74

batch 132 loss: 1.68741 acc: 0.69531 | v_loss: 1.73943 v_acc: 0.69173 |  iteration: 133 teacher: 0 stage: sketch lr: 0.000023
batch 133 loss: 1.73576 acc: 0.68685 | v_loss: 1.60544 v_acc: 0.70964 |  iteration: 134 teacher: 1 stage: sketch lr: 0.000023
batch 134 loss: 1.74398 acc: 0.69434 | v_loss: 1.96239 v_acc: 0.67350 |  iteration: 135 teacher: 1 stage: sketch lr: 0.000024
batch 135 loss: 1.69067 acc: 0.69759 | v_loss: 1.74712 v_acc: 0.69889 |  iteration: 136 teacher: 1 stage: sketch lr: 0.000024
batch 136 loss: 1.78759 acc: 0.69303 | v_loss: 1.58373 v_acc: 0.70703 |  iteration: 137 teacher: 1 stage: sketch lr: 0.000024
batch 137 loss: 1.66339 acc: 0.69694 | v_loss: 1.74571 v_acc: 0.69857 |  iteration: 138 teacher: 1 stage: sketch lr: 0.000024
batch 138 loss: 1.65795 acc: 0.69792 | v_loss: 1.66773 v_acc: 0.69141 |  iteration: 139 teacher: 0 stage: sketch lr: 0.000024
batch 139 loss: 1.67570 acc: 0.68913 | v_loss: 1.74753 v_acc: 0.68783 |  iteration: 140 teacher: 0 stage: sketch lr: 0

batch 198 loss: 1.68930 acc: 0.69531 | v_loss: 1.61775 v_acc: 0.70898 |  iteration: 199 teacher: 0 stage: sketch lr: 0.000035
batch 199 loss: 1.67995 acc: 0.70182 | v_loss: 1.53933 v_acc: 0.71159 |  iteration: 200 teacher: 0 stage: sketch lr: 0.000035
batch 200 loss: 1.63530 acc: 0.70150 | v_loss: 1.53781 v_acc: 0.71940 |  iteration: 201 teacher: 0 stage: sketch lr: 0.000035
batch 201 loss: 1.64198 acc: 0.69434 | v_loss: 1.70539 v_acc: 0.69401 |  iteration: 202 teacher: 0 stage: sketch lr: 0.000035
batch 202 loss: 1.66873 acc: 0.70312 | v_loss: 1.55474 v_acc: 0.72591 |  iteration: 203 teacher: 0 stage: sketch lr: 0.000035
batch 203 loss: 1.76989 acc: 0.69010 | v_loss: 1.42286 v_acc: 0.73958 |  iteration: 204 teacher: 0 stage: sketch lr: 0.000036
batch 204 loss: 1.86058 acc: 0.67773 | v_loss: 1.65751 v_acc: 0.70117 |  iteration: 205 teacher: 1 stage: sketch lr: 0.000036
batch 205 loss: 1.69577 acc: 0.69889 | v_loss: 1.71593 v_acc: 0.69206 |  iteration: 206 teacher: 0 stage: sketch lr: 0

batch 264 loss: 1.63861 acc: 0.69596 | v_loss: 1.74020 v_acc: 0.69238 |  iteration: 265 teacher: 0 stage: sketch lr: 0.000046
batch 265 loss: 1.62612 acc: 0.69694 | v_loss: 1.46848 v_acc: 0.73210 |  iteration: 266 teacher: 0 stage: sketch lr: 0.000046
batch 266 loss: 1.63437 acc: 0.70150 | v_loss: 1.67725 v_acc: 0.70573 |  iteration: 267 teacher: 1 stage: sketch lr: 0.000047
batch 267 loss: 1.67626 acc: 0.69629 | v_loss: 1.60498 v_acc: 0.70020 |  iteration: 268 teacher: 1 stage: sketch lr: 0.000047
batch 268 loss: 1.57342 acc: 0.70638 | v_loss: 1.62190 v_acc: 0.69954 |  iteration: 269 teacher: 1 stage: sketch lr: 0.000047
batch 269 loss: 1.63595 acc: 0.69889 | v_loss: 1.54682 v_acc: 0.70638 |  iteration: 270 teacher: 0 stage: sketch lr: 0.000047
batch 270 loss: 1.66021 acc: 0.70312 | v_loss: 1.62042 v_acc: 0.70508 |  iteration: 271 teacher: 0 stage: sketch lr: 0.000047
batch 271 loss: 1.56787 acc: 0.71029 | v_loss: 1.68036 v_acc: 0.68717 |  iteration: 272 teacher: 0 stage: sketch lr: 0

batch 330 loss: 1.51834 acc: 0.71777 | v_loss: 1.60294 v_acc: 0.69661 |  iteration: 331 teacher: 0 stage: sketch lr: 0.000058
batch 331 loss: 1.64155 acc: 0.70280 | v_loss: 1.47392 v_acc: 0.73079 |  iteration: 332 teacher: 0 stage: sketch lr: 0.000058
batch 332 loss: 1.58767 acc: 0.70443 | v_loss: 1.49363 v_acc: 0.71777 |  iteration: 333 teacher: 1 stage: sketch lr: 0.000058
batch 333 loss: 1.67123 acc: 0.69792 | v_loss: 1.45701 v_acc: 0.72656 |  iteration: 334 teacher: 1 stage: sketch lr: 0.000058
batch 334 loss: 1.56857 acc: 0.71159 | v_loss: 1.44871 v_acc: 0.72396 |  iteration: 335 teacher: 1 stage: sketch lr: 0.000059
batch 335 loss: 1.57910 acc: 0.70443 | v_loss: 1.48413 v_acc: 0.70801 |  iteration: 336 teacher: 0 stage: sketch lr: 0.000059
batch 336 loss: 1.55748 acc: 0.71452 | v_loss: 1.60559 v_acc: 0.70280 |  iteration: 337 teacher: 1 stage: sketch lr: 0.000059
batch 337 loss: 1.56971 acc: 0.70833 | v_loss: 1.56528 v_acc: 0.71126 |  iteration: 338 teacher: 1 stage: sketch lr: 0

batch 396 loss: 1.54574 acc: 0.71615 | v_loss: 1.58672 v_acc: 0.70833 |  iteration: 397 teacher: 0 stage: sketch lr: 0.000069
batch 397 loss: 1.58203 acc: 0.70996 | v_loss: 1.53546 v_acc: 0.71029 |  iteration: 398 teacher: 1 stage: sketch lr: 0.000070
batch 398 loss: 1.58683 acc: 0.70736 | v_loss: 1.71783 v_acc: 0.69271 |  iteration: 399 teacher: 1 stage: sketch lr: 0.000070
batch 399 loss: 1.51323 acc: 0.71712 | v_loss: 1.50502 v_acc: 0.72331 |  iteration: 400 teacher: 0 stage: sketch lr: 0.000070
batch 400 loss: 1.52087 acc: 0.72038 | v_loss: 1.71213 v_acc: 0.69629 |  iteration: 401 teacher: 0 stage: sketch lr: 0.000070
batch 401 loss: 1.55464 acc: 0.71322 | v_loss: 1.65235 v_acc: 0.71061 |  iteration: 402 teacher: 1 stage: sketch lr: 0.000070
batch 402 loss: 1.57110 acc: 0.71615 | v_loss: 1.66256 v_acc: 0.69596 |  iteration: 403 teacher: 0 stage: sketch lr: 0.000070
batch 403 loss: 1.55061 acc: 0.72852 | v_loss: 1.54942 v_acc: 0.70671 |  iteration: 404 teacher: 1 stage: sketch lr: 0

batch 462 loss: 1.55026 acc: 0.71257 | v_loss: 1.64819 v_acc: 0.70312 |  iteration: 463 teacher: 0 stage: sketch lr: 0.000081
batch 463 loss: 1.56216 acc: 0.71680 | v_loss: 1.50222 v_acc: 0.71289 |  iteration: 464 teacher: 0 stage: sketch lr: 0.000081
batch 464 loss: 1.49778 acc: 0.72559 | v_loss: 1.59232 v_acc: 0.69792 |  iteration: 465 teacher: 0 stage: sketch lr: 0.000081
batch 465 loss: 1.54475 acc: 0.71582 | v_loss: 1.50472 v_acc: 0.71875 |  iteration: 466 teacher: 1 stage: sketch lr: 0.000081
batch 466 loss: 1.56059 acc: 0.70931 | v_loss: 1.49783 v_acc: 0.71517 |  iteration: 467 teacher: 0 stage: sketch lr: 0.000082
batch 467 loss: 1.49422 acc: 0.72135 | v_loss: 1.36652 v_acc: 0.74251 |  iteration: 468 teacher: 0 stage: sketch lr: 0.000082
batch 468 loss: 1.46991 acc: 0.72331 | v_loss: 1.45367 v_acc: 0.72526 |  iteration: 469 teacher: 0 stage: sketch lr: 0.000082
batch 469 loss: 1.48605 acc: 0.72298 | v_loss: 1.43309 v_acc: 0.74967 |  iteration: 470 teacher: 0 stage: sketch lr: 0

batch 528 loss: 1.50720 acc: 0.72461 | v_loss: 1.44978 v_acc: 0.73242 |  iteration: 529 teacher: 1 stage: sketch lr: 0.000092
batch 529 loss: 1.47537 acc: 0.72754 | v_loss: 1.46517 v_acc: 0.73014 |  iteration: 530 teacher: 0 stage: sketch lr: 0.000093
batch 530 loss: 1.53116 acc: 0.71973 | v_loss: 1.50353 v_acc: 0.72201 |  iteration: 531 teacher: 1 stage: sketch lr: 0.000093
batch 531 loss: 1.52085 acc: 0.71940 | v_loss: 1.43535 v_acc: 0.73600 |  iteration: 532 teacher: 0 stage: sketch lr: 0.000093
batch 532 loss: 1.52997 acc: 0.71191 | v_loss: 1.45963 v_acc: 0.72201 |  iteration: 533 teacher: 0 stage: sketch lr: 0.000093
batch 533 loss: 1.53017 acc: 0.71908 | v_loss: 1.58519 v_acc: 0.70247 |  iteration: 534 teacher: 1 stage: sketch lr: 0.000093
batch 534 loss: 1.54026 acc: 0.71842 | v_loss: 1.46285 v_acc: 0.72331 |  iteration: 535 teacher: 0 stage: sketch lr: 0.000093
batch 535 loss: 1.43393 acc: 0.73275 | v_loss: 1.50159 v_acc: 0.72038 |  iteration: 536 teacher: 0 stage: sketch lr: 0

batch 594 loss: 1.50928 acc: 0.71549 | v_loss: 1.57038 v_acc: 0.70150 |  iteration: 595 teacher: 1 stage: sketch lr: 0.000104
batch 595 loss: 1.44976 acc: 0.73177 | v_loss: 1.44778 v_acc: 0.72689 |  iteration: 596 teacher: 1 stage: sketch lr: 0.000104
batch 596 loss: 1.45988 acc: 0.72721 | v_loss: 1.37728 v_acc: 0.73535 |  iteration: 597 teacher: 0 stage: sketch lr: 0.000104
batch 597 loss: 1.54962 acc: 0.71517 | v_loss: 1.37662 v_acc: 0.74251 |  iteration: 598 teacher: 1 stage: sketch lr: 0.000104
batch 598 loss: 1.49442 acc: 0.71842 | v_loss: 1.47807 v_acc: 0.72103 |  iteration: 599 teacher: 0 stage: sketch lr: 0.000105
batch 599 loss: 1.48420 acc: 0.72038 | v_loss: 1.53339 v_acc: 0.71680 |  iteration: 600 teacher: 1 stage: sketch lr: 0.000105
batch 600 loss: 1.54149 acc: 0.71029 | v_loss: 1.47730 v_acc: 0.72103 |  iteration: 601 teacher: 1 stage: sketch lr: 0.000105
batch 601 loss: 1.54441 acc: 0.71549 | v_loss: 1.42532 v_acc: 0.73372 |  iteration: 602 teacher: 1 stage: sketch lr: 0

batch 660 loss: 1.44326 acc: 0.73047 | v_loss: 1.48271 v_acc: 0.72363 |  iteration: 661 teacher: 0 stage: sketch lr: 0.000115
batch 661 loss: 1.43788 acc: 0.71973 | v_loss: 1.41482 v_acc: 0.73958 |  iteration: 662 teacher: 1 stage: sketch lr: 0.000116
batch 662 loss: 1.47007 acc: 0.72103 | v_loss: 1.33145 v_acc: 0.74935 |  iteration: 663 teacher: 1 stage: sketch lr: 0.000116
batch 663 loss: 1.47638 acc: 0.72721 | v_loss: 1.51307 v_acc: 0.71517 |  iteration: 664 teacher: 1 stage: sketch lr: 0.000116
batch 664 loss: 1.40959 acc: 0.73730 | v_loss: 1.41903 v_acc: 0.73145 |  iteration: 665 teacher: 1 stage: sketch lr: 0.000116
batch 665 loss: 1.47857 acc: 0.72689 | v_loss: 1.52720 v_acc: 0.71940 |  iteration: 666 teacher: 0 stage: sketch lr: 0.000116
batch 666 loss: 1.49212 acc: 0.72917 | v_loss: 1.46043 v_acc: 0.72982 |  iteration: 667 teacher: 1 stage: sketch lr: 0.000117
batch 667 loss: 1.42327 acc: 0.73730 | v_loss: 1.42949 v_acc: 0.73926 |  iteration: 668 teacher: 0 stage: sketch lr: 0

batch 726 loss: 1.54929 acc: 0.72233 | v_loss: 1.51597 v_acc: 0.71680 |  iteration: 727 teacher: 1 stage: sketch lr: 0.000127
batch 727 loss: 1.40732 acc: 0.73503 | v_loss: 1.38174 v_acc: 0.74089 |  iteration: 728 teacher: 0 stage: sketch lr: 0.000127
batch 728 loss: 1.45540 acc: 0.72624 | v_loss: 1.35875 v_acc: 0.74577 |  iteration: 729 teacher: 1 stage: sketch lr: 0.000127
batch 729 loss: 1.42573 acc: 0.72917 | v_loss: 1.38901 v_acc: 0.73503 |  iteration: 730 teacher: 0 stage: sketch lr: 0.000128
batch 730 loss: 1.46322 acc: 0.72559 | v_loss: 1.42159 v_acc: 0.73503 |  iteration: 731 teacher: 0 stage: sketch lr: 0.000128
batch 731 loss: 1.42082 acc: 0.73600 | v_loss: 1.40734 v_acc: 0.73470 |  iteration: 732 teacher: 0 stage: sketch lr: 0.000128
batch 732 loss: 1.39794 acc: 0.73470 | v_loss: 1.28005 v_acc: 0.75521 |  iteration: 733 teacher: 1 stage: sketch lr: 0.000128
batch 733 loss: 1.40868 acc: 0.73145 | v_loss: 1.40997 v_acc: 0.73079 |  iteration: 734 teacher: 0 stage: sketch lr: 0

batch 792 loss: 1.42872 acc: 0.73568 | v_loss: 1.36315 v_acc: 0.74154 |  iteration: 793 teacher: 1 stage: sketch lr: 0.000139
batch 793 loss: 1.35682 acc: 0.73730 | v_loss: 1.27040 v_acc: 0.76953 |  iteration: 794 teacher: 0 stage: sketch lr: 0.000139
batch 794 loss: 1.49559 acc: 0.73242 | v_loss: 1.46783 v_acc: 0.72624 |  iteration: 795 teacher: 1 stage: sketch lr: 0.000139
batch 795 loss: 1.40236 acc: 0.73600 | v_loss: 1.47214 v_acc: 0.72461 |  iteration: 796 teacher: 0 stage: sketch lr: 0.000139
batch 796 loss: 1.34505 acc: 0.74479 | v_loss: 1.29839 v_acc: 0.74902 |  iteration: 797 teacher: 0 stage: sketch lr: 0.000139
batch 797 loss: 1.33052 acc: 0.73958 | v_loss: 1.39921 v_acc: 0.72721 |  iteration: 798 teacher: 0 stage: sketch lr: 0.000139
batch 798 loss: 1.37363 acc: 0.73698 | v_loss: 1.26364 v_acc: 0.75423 |  iteration: 799 teacher: 1 stage: sketch lr: 0.000140
batch 799 loss: 1.40652 acc: 0.73014 | v_loss: 1.36334 v_acc: 0.73079 |  iteration: 800 teacher: 0 stage: sketch lr: 0

batch 858 loss: 1.44350 acc: 0.72884 | v_loss: 1.35116 v_acc: 0.73730 |  iteration: 859 teacher: 1 stage: sketch lr: 0.000150
batch 859 loss: 1.34745 acc: 0.74023 | v_loss: 1.28735 v_acc: 0.74935 |  iteration: 860 teacher: 1 stage: sketch lr: 0.000150
batch 860 loss: 1.28099 acc: 0.75456 | v_loss: 1.33308 v_acc: 0.74154 |  iteration: 861 teacher: 0 stage: sketch lr: 0.000150
batch 861 loss: 1.30212 acc: 0.74479 | v_loss: 1.33077 v_acc: 0.74772 |  iteration: 862 teacher: 0 stage: sketch lr: 0.000151
batch 862 loss: 1.42513 acc: 0.72949 | v_loss: 1.45470 v_acc: 0.72624 |  iteration: 863 teacher: 1 stage: sketch lr: 0.000151
batch 863 loss: 1.31493 acc: 0.74577 | v_loss: 1.36901 v_acc: 0.73861 |  iteration: 864 teacher: 0 stage: sketch lr: 0.000151
batch 864 loss: 1.35673 acc: 0.73861 | v_loss: 1.28302 v_acc: 0.74902 |  iteration: 865 teacher: 0 stage: sketch lr: 0.000151
batch 865 loss: 1.39630 acc: 0.73600 | v_loss: 1.33842 v_acc: 0.74740 |  iteration: 866 teacher: 1 stage: sketch lr: 0

batch 924 loss: 1.34608 acc: 0.73665 | v_loss: 1.22476 v_acc: 0.76628 |  iteration: 925 teacher: 1 stage: sketch lr: 0.000162
batch 925 loss: 1.36061 acc: 0.73796 | v_loss: 1.17773 v_acc: 0.77018 |  iteration: 926 teacher: 1 stage: sketch lr: 0.000162
batch 926 loss: 1.23343 acc: 0.75749 | v_loss: 1.25129 v_acc: 0.75879 |  iteration: 927 teacher: 0 stage: sketch lr: 0.000162
batch 927 loss: 1.36808 acc: 0.73307 | v_loss: 1.25787 v_acc: 0.74186 |  iteration: 928 teacher: 1 stage: sketch lr: 0.000162
batch 928 loss: 1.26567 acc: 0.75358 | v_loss: 1.16303 v_acc: 0.76921 |  iteration: 929 teacher: 1 stage: sketch lr: 0.000162
batch 929 loss: 1.30751 acc: 0.74089 | v_loss: 1.31828 v_acc: 0.74251 |  iteration: 930 teacher: 0 stage: sketch lr: 0.000162
batch 930 loss: 1.29902 acc: 0.74707 | v_loss: 1.19351 v_acc: 0.76172 |  iteration: 931 teacher: 1 stage: sketch lr: 0.000163
batch 931 loss: 1.29431 acc: 0.74479 | v_loss: 1.04584 v_acc: 0.79395 |  iteration: 932 teacher: 1 stage: sketch lr: 0

batch 990 loss: 1.15063 acc: 0.76628 | v_loss: 1.21929 v_acc: 0.75293 |  iteration: 991 teacher: 1 stage: sketch lr: 0.000173
batch 991 loss: 1.19241 acc: 0.76107 | v_loss: 1.17108 v_acc: 0.76823 |  iteration: 992 teacher: 1 stage: sketch lr: 0.000173
batch 992 loss: 1.26816 acc: 0.74642 | v_loss: 1.23739 v_acc: 0.75293 |  iteration: 993 teacher: 1 stage: sketch lr: 0.000173
batch 993 loss: 1.23788 acc: 0.74707 | v_loss: 1.09000 v_acc: 0.77409 |  iteration: 994 teacher: 0 stage: sketch lr: 0.000174
batch 994 loss: 1.35664 acc: 0.73112 | v_loss: 1.28598 v_acc: 0.74023 |  iteration: 995 teacher: 1 stage: sketch lr: 0.000174
batch 995 loss: 1.25996 acc: 0.74642 | v_loss: 1.15309 v_acc: 0.76530 |  iteration: 996 teacher: 0 stage: sketch lr: 0.000174
batch 996 loss: 1.21390 acc: 0.74512 | v_loss: 1.34637 v_acc: 0.73763 |  iteration: 997 teacher: 1 stage: sketch lr: 0.000174
batch 997 loss: 1.28502 acc: 0.74284 | v_loss: 1.19470 v_acc: 0.75293 |  iteration: 998 teacher: 1 stage: sketch lr: 0

batch 1055 loss: 1.00193 acc: 0.78125 | v_loss: 1.14323 v_acc: 0.75879 |  iteration: 1056 teacher: 0 stage: sketch lr: 0.000184
batch 1056 loss: 1.07176 acc: 0.77767 | v_loss: 1.05800 v_acc: 0.78353 |  iteration: 1057 teacher: 1 stage: sketch lr: 0.000185
batch 1057 loss: 1.10359 acc: 0.76888 | v_loss: 1.08459 v_acc: 0.77637 |  iteration: 1058 teacher: 1 stage: sketch lr: 0.000185
batch 1058 loss: 1.07030 acc: 0.77897 | v_loss: 1.15440 v_acc: 0.76139 |  iteration: 1059 teacher: 1 stage: sketch lr: 0.000185
batch 1059 loss: 1.01556 acc: 0.78027 | v_loss: 1.10538 v_acc: 0.77441 |  iteration: 1060 teacher: 1 stage: sketch lr: 0.000185
batch 1060 loss: 1.03447 acc: 0.77539 | v_loss: 1.00245 v_acc: 0.78613 |  iteration: 1061 teacher: 0 stage: sketch lr: 0.000185
batch 1061 loss: 1.12928 acc: 0.75814 | v_loss: 0.95640 v_acc: 0.79297 |  iteration: 1062 teacher: 1 stage: sketch lr: 0.000186
batch 1062 loss: 0.97570 acc: 0.79102 | v_loss: 0.97302 v_acc: 0.78971 |  iteration: 1063 teacher: 0 sta

batch 1120 loss: 0.80281 acc: 0.79492 | v_loss: 1.01819 v_acc: 0.76693 |  iteration: 1121 teacher: 0 stage: sketch lr: 0.000196
batch 1121 loss: 0.95259 acc: 0.77409 | v_loss: 1.02429 v_acc: 0.77279 |  iteration: 1122 teacher: 1 stage: sketch lr: 0.000196
batch 1122 loss: 1.54215 acc: 0.73014 | v_loss: 0.89888 v_acc: 0.78125 |  iteration: 1123 teacher: 1 stage: sketch lr: 0.000196
batch 1123 loss: 1.01845 acc: 0.74902 | v_loss: 0.89118 v_acc: 0.77279 |  iteration: 1124 teacher: 0 stage: sketch lr: 0.000196
batch 1124 loss: 0.84770 acc: 0.79069 | v_loss: 1.18412 v_acc: 0.74121 |  iteration: 1125 teacher: 0 stage: sketch lr: 0.000197
batch 1125 loss: 0.86858 acc: 0.76628 | v_loss: 0.83970 v_acc: 0.79492 |  iteration: 1126 teacher: 1 stage: sketch lr: 0.000197
batch 1126 loss: 0.81323 acc: 0.78841 | v_loss: 1.04557 v_acc: 0.77116 |  iteration: 1127 teacher: 0 stage: sketch lr: 0.000197
batch 1127 loss: 1.08376 acc: 0.74870 | v_loss: 1.03782 v_acc: 0.76953 |  iteration: 1128 teacher: 1 sta

batch 1185 loss: 0.74894 acc: 0.79818 | v_loss: 0.80515 v_acc: 0.80306 |  iteration: 1186 teacher: 1 stage: sketch lr: 0.000207
batch 1186 loss: 0.91563 acc: 0.76888 | v_loss: 0.78444 v_acc: 0.81803 |  iteration: 1187 teacher: 1 stage: sketch lr: 0.000207
batch 1187 loss: 0.75221 acc: 0.80859 | v_loss: 0.76004 v_acc: 0.81543 |  iteration: 1188 teacher: 1 stage: sketch lr: 0.000208
batch 1188 loss: 0.79354 acc: 0.80208 | v_loss: 0.64572 v_acc: 0.83366 |  iteration: 1189 teacher: 1 stage: sketch lr: 0.000208
batch 1189 loss: 0.61516 acc: 0.82975 | v_loss: 0.85882 v_acc: 0.79785 |  iteration: 1190 teacher: 0 stage: sketch lr: 0.000208
batch 1190 loss: 0.67312 acc: 0.81022 | v_loss: 0.78414 v_acc: 0.80176 |  iteration: 1191 teacher: 0 stage: sketch lr: 0.000208
batch 1191 loss: 0.95914 acc: 0.75814 | v_loss: 0.85074 v_acc: 0.79720 |  iteration: 1192 teacher: 1 stage: sketch lr: 0.000208
batch 1192 loss: 0.66393 acc: 0.82227 | v_loss: 1.11629 v_acc: 0.78190 |  iteration: 1193 teacher: 0 sta

batch 6 loss: 0.50044 acc: 0.84570 | v_loss: 0.76585 v_acc: 0.81608 |  iteration: 1250 teacher: 0 stage: sketch lr: 0.000218
batch 7 loss: 0.53877 acc: 0.84570 | v_loss: 0.71218 v_acc: 0.81836 |  iteration: 1251 teacher: 0 stage: sketch lr: 0.000219
batch 8 loss: 0.72752 acc: 0.80078 | v_loss: 0.76889 v_acc: 0.81608 |  iteration: 1252 teacher: 1 stage: sketch lr: 0.000219
batch 9 loss: 0.62822 acc: 0.82780 | v_loss: 0.99423 v_acc: 0.79069 |  iteration: 1253 teacher: 1 stage: sketch lr: 0.000219
batch 10 loss: 0.59582 acc: 0.82975 | v_loss: 0.63576 v_acc: 0.84115 |  iteration: 1254 teacher: 1 stage: sketch lr: 0.000219
batch 11 loss: 0.53713 acc: 0.84635 | v_loss: 0.69759 v_acc: 0.82845 |  iteration: 1255 teacher: 0 stage: sketch lr: 0.000219
batch 12 loss: 0.52773 acc: 0.83984 | v_loss: 0.66473 v_acc: 0.84668 |  iteration: 1256 teacher: 1 stage: sketch lr: 0.000219
batch 13 loss: 0.68889 acc: 0.80436 | v_loss: 0.57604 v_acc: 0.85449 |  iteration: 1257 teacher: 1 stage: sketch lr: 0.000

batch 72 loss: 0.49089 acc: 0.85417 | v_loss: 0.62646 v_acc: 0.83822 |  iteration: 1316 teacher: 0 stage: sketch lr: 0.000230
batch 73 loss: 0.72865 acc: 0.81250 | v_loss: 0.90552 v_acc: 0.81543 |  iteration: 1317 teacher: 1 stage: sketch lr: 0.000230
batch 74 loss: 0.47858 acc: 0.86198 | v_loss: 0.68122 v_acc: 0.82975 |  iteration: 1318 teacher: 0 stage: sketch lr: 0.000230
batch 75 loss: 0.48360 acc: 0.85547 | v_loss: 0.77781 v_acc: 0.82194 |  iteration: 1319 teacher: 0 stage: sketch lr: 0.000230
batch 76 loss: 0.43896 acc: 0.86751 | v_loss: 0.55880 v_acc: 0.85807 |  iteration: 1320 teacher: 0 stage: sketch lr: 0.000231
batch 77 loss: 0.56880 acc: 0.84375 | v_loss: 0.70612 v_acc: 0.83105 |  iteration: 1321 teacher: 1 stage: sketch lr: 0.000231
batch 78 loss: 0.53573 acc: 0.84375 | v_loss: 0.90773 v_acc: 0.79102 |  iteration: 1322 teacher: 0 stage: sketch lr: 0.000231
batch 79 loss: 0.59523 acc: 0.84245 | v_loss: 0.68773 v_acc: 0.84115 |  iteration: 1323 teacher: 1 stage: sketch lr: 0

batch 137 loss: 0.40086 acc: 0.88216 | v_loss: 0.73019 v_acc: 0.83529 |  iteration: 1381 teacher: 0 stage: sketch lr: 0.000241
batch 138 loss: 0.54978 acc: 0.84798 | v_loss: 0.71874 v_acc: 0.82454 |  iteration: 1382 teacher: 1 stage: sketch lr: 0.000241
batch 139 loss: 0.42972 acc: 0.86979 | v_loss: 0.68681 v_acc: 0.83724 |  iteration: 1383 teacher: 0 stage: sketch lr: 0.000242
batch 140 loss: 0.48364 acc: 0.85254 | v_loss: 0.60407 v_acc: 0.85254 |  iteration: 1384 teacher: 1 stage: sketch lr: 0.000242
batch 141 loss: 0.46524 acc: 0.86589 | v_loss: 0.57215 v_acc: 0.85872 |  iteration: 1385 teacher: 0 stage: sketch lr: 0.000242
batch 142 loss: 0.48515 acc: 0.85905 | v_loss: 0.64008 v_acc: 0.84668 |  iteration: 1386 teacher: 0 stage: sketch lr: 0.000242
batch 143 loss: 0.51265 acc: 0.85775 | v_loss: 0.75794 v_acc: 0.81315 |  iteration: 1387 teacher: 1 stage: sketch lr: 0.000242
batch 144 loss: 0.47054 acc: 0.86914 | v_loss: 0.70852 v_acc: 0.84310 |  iteration: 1388 teacher: 0 stage: sket

batch 202 loss: 0.40326 acc: 0.88672 | v_loss: 0.78524 v_acc: 0.82617 |  iteration: 1446 teacher: 0 stage: sketch lr: 0.000253
batch 203 loss: 0.49024 acc: 0.85221 | v_loss: 0.57522 v_acc: 0.85905 |  iteration: 1447 teacher: 1 stage: sketch lr: 0.000253
batch 204 loss: 0.51670 acc: 0.85059 | v_loss: 0.66901 v_acc: 0.83724 |  iteration: 1448 teacher: 0 stage: sketch lr: 0.000253
batch 205 loss: 0.46486 acc: 0.86719 | v_loss: 0.55545 v_acc: 0.84961 |  iteration: 1449 teacher: 1 stage: sketch lr: 0.000253
batch 206 loss: 0.48874 acc: 0.85254 | v_loss: 1.03697 v_acc: 0.79102 |  iteration: 1450 teacher: 1 stage: sketch lr: 0.000253
batch 207 loss: 0.58392 acc: 0.84342 | v_loss: 0.70005 v_acc: 0.84180 |  iteration: 1451 teacher: 1 stage: sketch lr: 0.000253
batch 208 loss: 0.40075 acc: 0.87337 | v_loss: 0.74360 v_acc: 0.83105 |  iteration: 1452 teacher: 0 stage: sketch lr: 0.000254
batch 209 loss: 0.48827 acc: 0.86556 | v_loss: 0.54820 v_acc: 0.85970 |  iteration: 1453 teacher: 0 stage: sket

batch 267 loss: 0.52835 acc: 0.85547 | v_loss: 0.46896 v_acc: 0.88118 |  iteration: 1511 teacher: 1 stage: sketch lr: 0.000264
batch 268 loss: 0.46527 acc: 0.86035 | v_loss: 1.03700 v_acc: 0.80306 |  iteration: 1512 teacher: 1 stage: sketch lr: 0.000264
batch 269 loss: 0.46157 acc: 0.86393 | v_loss: 0.73910 v_acc: 0.83171 |  iteration: 1513 teacher: 1 stage: sketch lr: 0.000264
batch 270 loss: 0.43828 acc: 0.87305 | v_loss: 0.77786 v_acc: 0.82812 |  iteration: 1514 teacher: 0 stage: sketch lr: 0.000264
batch 271 loss: 0.49360 acc: 0.85905 | v_loss: 0.65613 v_acc: 0.84635 |  iteration: 1515 teacher: 0 stage: sketch lr: 0.000265
batch 272 loss: 0.48532 acc: 0.85775 | v_loss: 0.65598 v_acc: 0.83984 |  iteration: 1516 teacher: 0 stage: sketch lr: 0.000265
batch 273 loss: 0.41781 acc: 0.86784 | v_loss: 0.65909 v_acc: 0.85352 |  iteration: 1517 teacher: 0 stage: sketch lr: 0.000265
batch 274 loss: 0.49151 acc: 0.86296 | v_loss: 0.57329 v_acc: 0.85775 |  iteration: 1518 teacher: 1 stage: sket

batch 332 loss: 0.49396 acc: 0.85872 | v_loss: 0.48010 v_acc: 0.86751 |  iteration: 1576 teacher: 1 stage: sketch lr: 0.000275
batch 333 loss: 0.45976 acc: 0.86686 | v_loss: 0.42366 v_acc: 0.88835 |  iteration: 1577 teacher: 1 stage: sketch lr: 0.000275
batch 334 loss: 0.42307 acc: 0.86719 | v_loss: 0.65203 v_acc: 0.85254 |  iteration: 1578 teacher: 0 stage: sketch lr: 0.000276
batch 335 loss: 0.40318 acc: 0.87858 | v_loss: 0.69215 v_acc: 0.84863 |  iteration: 1579 teacher: 1 stage: sketch lr: 0.000276
batch 336 loss: 0.49454 acc: 0.85872 | v_loss: 0.75087 v_acc: 0.83040 |  iteration: 1580 teacher: 1 stage: sketch lr: 0.000276
batch 337 loss: 0.40831 acc: 0.88118 | v_loss: 0.55120 v_acc: 0.86621 |  iteration: 1581 teacher: 0 stage: sketch lr: 0.000276
batch 338 loss: 0.34893 acc: 0.89225 | v_loss: 0.67814 v_acc: 0.84082 |  iteration: 1582 teacher: 0 stage: sketch lr: 0.000276
batch 339 loss: 0.41857 acc: 0.87923 | v_loss: 0.76847 v_acc: 0.82194 |  iteration: 1583 teacher: 1 stage: sket

batch 397 loss: 0.46019 acc: 0.86947 | v_loss: 0.61831 v_acc: 0.85645 |  iteration: 1641 teacher: 0 stage: sketch lr: 0.000287
batch 398 loss: 0.38144 acc: 0.88672 | v_loss: 0.66115 v_acc: 0.84440 |  iteration: 1642 teacher: 1 stage: sketch lr: 0.000287
batch 399 loss: 0.41262 acc: 0.88509 | v_loss: 0.73838 v_acc: 0.82878 |  iteration: 1643 teacher: 0 stage: sketch lr: 0.000287
batch 400 loss: 0.51547 acc: 0.85319 | v_loss: 0.55950 v_acc: 0.85254 |  iteration: 1644 teacher: 0 stage: sketch lr: 0.000287
batch 401 loss: 0.32592 acc: 0.90202 | v_loss: 0.64710 v_acc: 0.83757 |  iteration: 1645 teacher: 1 stage: sketch lr: 0.000287
batch 402 loss: 0.37418 acc: 0.88379 | v_loss: 0.66793 v_acc: 0.84408 |  iteration: 1646 teacher: 1 stage: sketch lr: 0.000288
batch 403 loss: 0.38738 acc: 0.88086 | v_loss: 0.83642 v_acc: 0.82715 |  iteration: 1647 teacher: 0 stage: sketch lr: 0.000288
batch 404 loss: 0.41106 acc: 0.88249 | v_loss: 0.65709 v_acc: 0.85514 |  iteration: 1648 teacher: 0 stage: sket

batch 462 loss: 0.44335 acc: 0.86491 | v_loss: 0.68312 v_acc: 0.84408 |  iteration: 1706 teacher: 1 stage: sketch lr: 0.000298
batch 463 loss: 0.37329 acc: 0.89681 | v_loss: 0.77293 v_acc: 0.83268 |  iteration: 1707 teacher: 1 stage: sketch lr: 0.000298
batch 464 loss: 0.46894 acc: 0.86816 | v_loss: 0.91217 v_acc: 0.81803 |  iteration: 1708 teacher: 0 stage: sketch lr: 0.000298
batch 465 loss: 0.48010 acc: 0.86393 | v_loss: 0.53033 v_acc: 0.87533 |  iteration: 1709 teacher: 0 stage: sketch lr: 0.000299
batch 466 loss: 0.38359 acc: 0.88118 | v_loss: 0.61778 v_acc: 0.85091 |  iteration: 1710 teacher: 0 stage: sketch lr: 0.000299
batch 467 loss: 0.37771 acc: 0.88346 | v_loss: 0.61420 v_acc: 0.85775 |  iteration: 1711 teacher: 1 stage: sketch lr: 0.000299
batch 468 loss: 0.40303 acc: 0.88053 | v_loss: 0.51522 v_acc: 0.87142 |  iteration: 1712 teacher: 0 stage: sketch lr: 0.000299
batch 469 loss: 0.43811 acc: 0.86458 | v_loss: 1.08017 v_acc: 0.79622 |  iteration: 1713 teacher: 1 stage: sket

batch 527 loss: 0.29899 acc: 0.90560 | v_loss: 0.57484 v_acc: 0.86686 |  iteration: 1771 teacher: 1 stage: sketch lr: 0.000309
batch 528 loss: 0.40292 acc: 0.88867 | v_loss: 0.90468 v_acc: 0.83138 |  iteration: 1772 teacher: 1 stage: sketch lr: 0.000310
batch 529 loss: 0.41575 acc: 0.87695 | v_loss: 0.59856 v_acc: 0.85710 |  iteration: 1773 teacher: 0 stage: sketch lr: 0.000310
batch 530 loss: 0.39317 acc: 0.88314 | v_loss: 0.68462 v_acc: 0.84701 |  iteration: 1774 teacher: 0 stage: sketch lr: 0.000310
batch 531 loss: 0.43592 acc: 0.87663 | v_loss: 0.48402 v_acc: 0.87565 |  iteration: 1775 teacher: 0 stage: sketch lr: 0.000310
batch 532 loss: 0.35530 acc: 0.89290 | v_loss: 0.63713 v_acc: 0.85449 |  iteration: 1776 teacher: 0 stage: sketch lr: 0.000310
batch 533 loss: 0.37744 acc: 0.88770 | v_loss: 0.87933 v_acc: 0.81999 |  iteration: 1777 teacher: 1 stage: sketch lr: 0.000310
batch 534 loss: 0.39274 acc: 0.88346 | v_loss: 0.59754 v_acc: 0.86719 |  iteration: 1778 teacher: 1 stage: sket

batch 592 loss: 0.45526 acc: 0.87988 | v_loss: 0.68678 v_acc: 0.84993 |  iteration: 1836 teacher: 1 stage: sketch lr: 0.000321
batch 593 loss: 0.41482 acc: 0.87370 | v_loss: 0.62644 v_acc: 0.85547 |  iteration: 1837 teacher: 1 stage: sketch lr: 0.000321
batch 594 loss: 0.37364 acc: 0.88965 | v_loss: 0.60120 v_acc: 0.85384 |  iteration: 1838 teacher: 1 stage: sketch lr: 0.000321
batch 595 loss: 0.34421 acc: 0.89290 | v_loss: 0.52808 v_acc: 0.87663 |  iteration: 1839 teacher: 0 stage: sketch lr: 0.000321
batch 596 loss: 0.35024 acc: 0.89909 | v_loss: 0.52580 v_acc: 0.88021 |  iteration: 1840 teacher: 0 stage: sketch lr: 0.000321
batch 597 loss: 0.37299 acc: 0.89388 | v_loss: 0.55973 v_acc: 0.87077 |  iteration: 1841 teacher: 0 stage: sketch lr: 0.000322
batch 598 loss: 0.40297 acc: 0.88737 | v_loss: 0.68344 v_acc: 0.83822 |  iteration: 1842 teacher: 1 stage: sketch lr: 0.000322
batch 599 loss: 0.38261 acc: 0.88835 | v_loss: 0.61234 v_acc: 0.87435 |  iteration: 1843 teacher: 1 stage: sket

batch 657 loss: 0.33363 acc: 0.89714 | v_loss: 0.67069 v_acc: 0.85124 |  iteration: 1901 teacher: 0 stage: sketch lr: 0.000332
batch 658 loss: 0.36153 acc: 0.89062 | v_loss: 0.47731 v_acc: 0.88770 |  iteration: 1902 teacher: 1 stage: sketch lr: 0.000332
batch 659 loss: 0.35511 acc: 0.88997 | v_loss: 0.64733 v_acc: 0.85710 |  iteration: 1903 teacher: 0 stage: sketch lr: 0.000332
batch 660 loss: 0.42462 acc: 0.87728 | v_loss: 0.42493 v_acc: 0.88281 |  iteration: 1904 teacher: 1 stage: sketch lr: 0.000333
batch 661 loss: 0.35596 acc: 0.89941 | v_loss: 0.97283 v_acc: 0.82943 |  iteration: 1905 teacher: 0 stage: sketch lr: 0.000333
batch 662 loss: 0.26356 acc: 0.91667 | v_loss: 0.60101 v_acc: 0.86751 |  iteration: 1906 teacher: 1 stage: sketch lr: 0.000333
batch 663 loss: 0.40433 acc: 0.88867 | v_loss: 0.67518 v_acc: 0.85579 |  iteration: 1907 teacher: 0 stage: sketch lr: 0.000333
batch 664 loss: 0.41662 acc: 0.88802 | v_loss: 0.46223 v_acc: 0.88965 |  iteration: 1908 teacher: 1 stage: sket

batch 722 loss: 0.35146 acc: 0.89323 | v_loss: 0.41203 v_acc: 0.90560 |  iteration: 1966 teacher: 0 stage: sketch lr: 0.000343
batch 723 loss: 0.42573 acc: 0.87956 | v_loss: 0.98776 v_acc: 0.81445 |  iteration: 1967 teacher: 1 stage: sketch lr: 0.000344
batch 724 loss: 0.35917 acc: 0.89616 | v_loss: 0.69282 v_acc: 0.85091 |  iteration: 1968 teacher: 0 stage: sketch lr: 0.000344
batch 725 loss: 0.36368 acc: 0.88965 | v_loss: 0.72245 v_acc: 0.84538 |  iteration: 1969 teacher: 0 stage: sketch lr: 0.000344
batch 726 loss: 0.39895 acc: 0.87467 | v_loss: 0.59836 v_acc: 0.87272 |  iteration: 1970 teacher: 0 stage: sketch lr: 0.000344
batch 727 loss: 0.41406 acc: 0.88314 | v_loss: 0.58720 v_acc: 0.86784 |  iteration: 1971 teacher: 1 stage: sketch lr: 0.000344
batch 728 loss: 0.37248 acc: 0.89941 | v_loss: 0.67629 v_acc: 0.85905 |  iteration: 1972 teacher: 1 stage: sketch lr: 0.000344
batch 729 loss: 0.37731 acc: 0.88672 | v_loss: 0.54354 v_acc: 0.87598 |  iteration: 1973 teacher: 0 stage: sket

batch 787 loss: 0.38657 acc: 0.89779 | v_loss: 0.41201 v_acc: 0.89290 |  iteration: 2031 teacher: 1 stage: sketch lr: 0.000355
batch 788 loss: 0.39663 acc: 0.88574 | v_loss: 0.35536 v_acc: 0.91016 |  iteration: 2032 teacher: 1 stage: sketch lr: 0.000355
batch 789 loss: 0.42370 acc: 0.86816 | v_loss: 0.59397 v_acc: 0.86654 |  iteration: 2033 teacher: 1 stage: sketch lr: 0.000355
batch 790 loss: 0.34658 acc: 0.88965 | v_loss: 0.62923 v_acc: 0.86621 |  iteration: 2034 teacher: 0 stage: sketch lr: 0.000355
batch 791 loss: 0.33134 acc: 0.90072 | v_loss: 0.70755 v_acc: 0.85417 |  iteration: 2035 teacher: 0 stage: sketch lr: 0.000355
batch 792 loss: 0.33071 acc: 0.90365 | v_loss: 0.48331 v_acc: 0.88509 |  iteration: 2036 teacher: 1 stage: sketch lr: 0.000356
batch 793 loss: 0.29643 acc: 0.90527 | v_loss: 0.61709 v_acc: 0.85579 |  iteration: 2037 teacher: 0 stage: sketch lr: 0.000356
batch 794 loss: 0.38015 acc: 0.90072 | v_loss: 0.68105 v_acc: 0.85221 |  iteration: 2038 teacher: 1 stage: sket

batch 852 loss: 0.36028 acc: 0.89030 | v_loss: 0.56111 v_acc: 0.87044 |  iteration: 2096 teacher: 0 stage: sketch lr: 0.000366
batch 853 loss: 0.36555 acc: 0.88997 | v_loss: 0.60035 v_acc: 0.85482 |  iteration: 2097 teacher: 0 stage: sketch lr: 0.000366
batch 854 loss: 0.34391 acc: 0.89941 | v_loss: 0.73059 v_acc: 0.83398 |  iteration: 2098 teacher: 0 stage: sketch lr: 0.000367
batch 855 loss: 0.29350 acc: 0.91732 | v_loss: 0.51332 v_acc: 0.87467 |  iteration: 2099 teacher: 1 stage: sketch lr: 0.000367
batch 856 loss: 0.34323 acc: 0.89909 | v_loss: 0.57842 v_acc: 0.86035 |  iteration: 2100 teacher: 1 stage: sketch lr: 0.000367
batch 857 loss: 0.26754 acc: 0.91667 | v_loss: 0.59747 v_acc: 0.86263 |  iteration: 2101 teacher: 0 stage: sketch lr: 0.000367
batch 858 loss: 0.38932 acc: 0.88900 | v_loss: 0.82161 v_acc: 0.84245 |  iteration: 2102 teacher: 1 stage: sketch lr: 0.000367
batch 859 loss: 0.36509 acc: 0.89486 | v_loss: 0.59402 v_acc: 0.86947 |  iteration: 2103 teacher: 1 stage: sket

batch 917 loss: 0.32179 acc: 0.90495 | v_loss: 0.58954 v_acc: 0.85449 |  iteration: 2161 teacher: 0 stage: sketch lr: 0.000378
batch 918 loss: 0.39522 acc: 0.87988 | v_loss: 0.63941 v_acc: 0.86230 |  iteration: 2162 teacher: 0 stage: sketch lr: 0.000378
batch 919 loss: 0.24906 acc: 0.92643 | v_loss: 0.84303 v_acc: 0.84115 |  iteration: 2163 teacher: 1 stage: sketch lr: 0.000378
batch 920 loss: 0.32825 acc: 0.90137 | v_loss: 0.46469 v_acc: 0.89551 |  iteration: 2164 teacher: 1 stage: sketch lr: 0.000378
batch 921 loss: 0.31088 acc: 0.90690 | v_loss: 0.63754 v_acc: 0.86849 |  iteration: 2165 teacher: 0 stage: sketch lr: 0.000378
batch 922 loss: 0.44162 acc: 0.87598 | v_loss: 0.58376 v_acc: 0.88249 |  iteration: 2166 teacher: 0 stage: sketch lr: 0.000378
batch 923 loss: 0.32072 acc: 0.90169 | v_loss: 0.49462 v_acc: 0.89258 |  iteration: 2167 teacher: 0 stage: sketch lr: 0.000379
batch 924 loss: 0.35398 acc: 0.90007 | v_loss: 1.13279 v_acc: 0.80566 |  iteration: 2168 teacher: 1 stage: sket

batch 982 loss: 0.31623 acc: 0.91211 | v_loss: 0.54711 v_acc: 0.87728 |  iteration: 2226 teacher: 0 stage: sketch lr: 0.000389
batch 983 loss: 0.30935 acc: 0.91536 | v_loss: 0.84328 v_acc: 0.84342 |  iteration: 2227 teacher: 0 stage: sketch lr: 0.000389
batch 984 loss: 0.32909 acc: 0.90755 | v_loss: 0.58689 v_acc: 0.86426 |  iteration: 2228 teacher: 1 stage: sketch lr: 0.000389
batch 985 loss: 0.33061 acc: 0.90397 | v_loss: 0.67563 v_acc: 0.86296 |  iteration: 2229 teacher: 1 stage: sketch lr: 0.000389
batch 986 loss: 0.35448 acc: 0.89844 | v_loss: 0.41641 v_acc: 0.89909 |  iteration: 2230 teacher: 0 stage: sketch lr: 0.000390
batch 987 loss: 0.29119 acc: 0.91081 | v_loss: 0.56413 v_acc: 0.86979 |  iteration: 2231 teacher: 0 stage: sketch lr: 0.000390
batch 988 loss: 0.31221 acc: 0.90755 | v_loss: 0.85879 v_acc: 0.82357 |  iteration: 2232 teacher: 1 stage: sketch lr: 0.000390
batch 989 loss: 0.39887 acc: 0.89128 | v_loss: 0.51726 v_acc: 0.88932 |  iteration: 2233 teacher: 1 stage: sket

batch 1047 loss: 0.27514 acc: 0.91960 | v_loss: 0.62476 v_acc: 0.87402 |  iteration: 2291 teacher: 0 stage: sketch lr: 0.000400
batch 1048 loss: 0.33965 acc: 0.90527 | v_loss: 0.54909 v_acc: 0.86914 |  iteration: 2292 teacher: 1 stage: sketch lr: 0.000400
batch 1049 loss: 0.25717 acc: 0.92285 | v_loss: 0.58579 v_acc: 0.87142 |  iteration: 2293 teacher: 1 stage: sketch lr: 0.000401
batch 1050 loss: 0.37772 acc: 0.89714 | v_loss: 0.48664 v_acc: 0.89095 |  iteration: 2294 teacher: 0 stage: sketch lr: 0.000401
batch 1051 loss: 0.35160 acc: 0.89779 | v_loss: 0.47371 v_acc: 0.88802 |  iteration: 2295 teacher: 0 stage: sketch lr: 0.000401
batch 1052 loss: 0.41386 acc: 0.87663 | v_loss: 0.55222 v_acc: 0.87923 |  iteration: 2296 teacher: 1 stage: sketch lr: 0.000401
batch 1053 loss: 0.42349 acc: 0.88672 | v_loss: 0.66924 v_acc: 0.84473 |  iteration: 2297 teacher: 1 stage: sketch lr: 0.000401
batch 1054 loss: 0.41514 acc: 0.88314 | v_loss: 0.62574 v_acc: 0.86719 |  iteration: 2298 teacher: 1 sta

batch 1112 loss: 0.22985 acc: 0.93132 | v_loss: 0.64866 v_acc: 0.86654 |  iteration: 2356 teacher: 0 stage: sketch lr: 0.000412
batch 1113 loss: 0.35309 acc: 0.90430 | v_loss: 0.50536 v_acc: 0.89909 |  iteration: 2357 teacher: 1 stage: sketch lr: 0.000412
batch 1114 loss: 0.31197 acc: 0.91243 | v_loss: 0.71371 v_acc: 0.86458 |  iteration: 2358 teacher: 1 stage: sketch lr: 0.000412
batch 1115 loss: 0.36486 acc: 0.90137 | v_loss: 0.44961 v_acc: 0.88965 |  iteration: 2359 teacher: 0 stage: sketch lr: 0.000412
batch 1116 loss: 0.32849 acc: 0.91146 | v_loss: 1.04886 v_acc: 0.82585 |  iteration: 2360 teacher: 1 stage: sketch lr: 0.000412
batch 1117 loss: 0.37900 acc: 0.89844 | v_loss: 0.59759 v_acc: 0.87598 |  iteration: 2361 teacher: 0 stage: sketch lr: 0.000412
batch 1118 loss: 0.40518 acc: 0.87956 | v_loss: 0.69010 v_acc: 0.86751 |  iteration: 2362 teacher: 0 stage: sketch lr: 0.000413
batch 1119 loss: 0.25188 acc: 0.92936 | v_loss: 0.45959 v_acc: 0.88639 |  iteration: 2363 teacher: 1 sta

batch 1177 loss: 0.32431 acc: 0.90723 | v_loss: 0.37090 v_acc: 0.91536 |  iteration: 2421 teacher: 0 stage: sketch lr: 0.000423
batch 1178 loss: 0.27415 acc: 0.92383 | v_loss: 0.92485 v_acc: 0.82292 |  iteration: 2422 teacher: 1 stage: sketch lr: 0.000423
batch 1179 loss: 0.33941 acc: 0.89714 | v_loss: 0.66032 v_acc: 0.86458 |  iteration: 2423 teacher: 0 stage: sketch lr: 0.000423
batch 1180 loss: 0.32877 acc: 0.90625 | v_loss: 0.69121 v_acc: 0.85384 |  iteration: 2424 teacher: 1 stage: sketch lr: 0.000423
batch 1181 loss: 0.31826 acc: 0.90983 | v_loss: 0.52723 v_acc: 0.89974 |  iteration: 2425 teacher: 1 stage: sketch lr: 0.000424
batch 1182 loss: 0.26015 acc: 0.92415 | v_loss: 0.54540 v_acc: 0.88411 |  iteration: 2426 teacher: 0 stage: sketch lr: 0.000424
batch 1183 loss: 0.29959 acc: 0.91504 | v_loss: 0.64759 v_acc: 0.87305 |  iteration: 2427 teacher: 0 stage: sketch lr: 0.000424
batch 1184 loss: 0.27325 acc: 0.92708 | v_loss: 0.54709 v_acc: 0.88835 |  iteration: 2428 teacher: 0 sta

batch 1242 loss: 0.22144 acc: 0.93197 | v_loss: 0.39864 v_acc: 0.90072 |  iteration: 2486 teacher: 1 stage: sketch lr: 0.000434
epoch 1 loss: 0.40270 acc: 0.88410 | v_loss: 0.63200 v_acc: 0.85740 
epoch: 2
__________________________________________
batch 0 loss: 0.26208 acc: 0.92546 | v_loss: 0.63468 v_acc: 0.86816 |  iteration: 2487 teacher: 1 stage: sketch lr: 0.000434
batch 1 loss: 0.25218 acc: 0.92643 | v_loss: 0.53681 v_acc: 0.88574 |  iteration: 2488 teacher: 1 stage: sketch lr: 0.000435
batch 2 loss: 0.29199 acc: 0.91927 | v_loss: 0.54543 v_acc: 0.88770 |  iteration: 2489 teacher: 1 stage: sketch lr: 0.000435
batch 3 loss: 0.30515 acc: 0.91471 | v_loss: 0.51969 v_acc: 0.90169 |  iteration: 2490 teacher: 0 stage: sketch lr: 0.000435
batch 4 loss: 0.39324 acc: 0.89518 | v_loss: 0.46453 v_acc: 0.89746 |  iteration: 2491 teacher: 1 stage: sketch lr: 0.000435
batch 5 loss: 0.31834 acc: 0.90527 | v_loss: 0.40818 v_acc: 0.90820 |  iteration: 2492 teacher: 1 stage: sketch lr: 0.000435
b

batch 64 loss: 0.28152 acc: 0.91211 | v_loss: 0.44258 v_acc: 0.89421 |  iteration: 2551 teacher: 0 stage: sketch lr: 0.000446
batch 65 loss: 0.36608 acc: 0.88997 | v_loss: 0.56964 v_acc: 0.87370 |  iteration: 2552 teacher: 0 stage: sketch lr: 0.000446
batch 66 loss: 0.29786 acc: 0.90723 | v_loss: 0.67274 v_acc: 0.85742 |  iteration: 2553 teacher: 0 stage: sketch lr: 0.000446
batch 67 loss: 0.28482 acc: 0.91797 | v_loss: 0.68829 v_acc: 0.84831 |  iteration: 2554 teacher: 1 stage: sketch lr: 0.000446
batch 68 loss: 0.36219 acc: 0.90755 | v_loss: 0.51595 v_acc: 0.89779 |  iteration: 2555 teacher: 1 stage: sketch lr: 0.000446
batch 69 loss: 0.24840 acc: 0.92806 | v_loss: 0.42749 v_acc: 0.90820 |  iteration: 2556 teacher: 0 stage: sketch lr: 0.000447
batch 70 loss: 0.37676 acc: 0.90104 | v_loss: 0.33211 v_acc: 0.91667 |  iteration: 2557 teacher: 0 stage: sketch lr: 0.000447
batch 71 loss: 0.28062 acc: 0.92285 | v_loss: 0.48864 v_acc: 0.90462 |  iteration: 2558 teacher: 1 stage: sketch lr: 0

batch 129 loss: 0.24509 acc: 0.93359 | v_loss: 0.57378 v_acc: 0.87370 |  iteration: 2616 teacher: 1 stage: sketch lr: 0.000457
batch 130 loss: 0.29890 acc: 0.91048 | v_loss: 0.75215 v_acc: 0.85352 |  iteration: 2617 teacher: 0 stage: sketch lr: 0.000457
batch 131 loss: 0.34749 acc: 0.90202 | v_loss: 0.56102 v_acc: 0.89095 |  iteration: 2618 teacher: 0 stage: sketch lr: 0.000457
batch 132 loss: 0.25698 acc: 0.92871 | v_loss: 0.57828 v_acc: 0.89095 |  iteration: 2619 teacher: 1 stage: sketch lr: 0.000458
batch 133 loss: 0.27619 acc: 0.92025 | v_loss: 0.65430 v_acc: 0.87630 |  iteration: 2620 teacher: 0 stage: sketch lr: 0.000458
batch 134 loss: 0.21617 acc: 0.93164 | v_loss: 0.66097 v_acc: 0.85124 |  iteration: 2621 teacher: 0 stage: sketch lr: 0.000458
batch 135 loss: 0.30063 acc: 0.91960 | v_loss: 0.45886 v_acc: 0.89909 |  iteration: 2622 teacher: 1 stage: sketch lr: 0.000458
batch 136 loss: 0.32563 acc: 0.90332 | v_loss: 0.76669 v_acc: 0.84993 |  iteration: 2623 teacher: 0 stage: sket

batch 194 loss: 0.27520 acc: 0.92220 | v_loss: 0.53691 v_acc: 0.88542 |  iteration: 2681 teacher: 1 stage: sketch lr: 0.000468
batch 195 loss: 0.43378 acc: 0.87305 | v_loss: 0.41731 v_acc: 0.90202 |  iteration: 2682 teacher: 1 stage: sketch lr: 0.000469
batch 196 loss: 0.25468 acc: 0.92220 | v_loss: 1.12332 v_acc: 0.81087 |  iteration: 2683 teacher: 1 stage: sketch lr: 0.000469
batch 197 loss: 0.25910 acc: 0.92546 | v_loss: 0.37500 v_acc: 0.92057 |  iteration: 2684 teacher: 0 stage: sketch lr: 0.000469
batch 198 loss: 0.30682 acc: 0.91341 | v_loss: 0.61005 v_acc: 0.87923 |  iteration: 2685 teacher: 0 stage: sketch lr: 0.000469
batch 199 loss: 0.32468 acc: 0.90853 | v_loss: 0.51104 v_acc: 0.89160 |  iteration: 2686 teacher: 0 stage: sketch lr: 0.000469
batch 200 loss: 0.28932 acc: 0.92253 | v_loss: 0.42784 v_acc: 0.90104 |  iteration: 2687 teacher: 1 stage: sketch lr: 0.000469
batch 201 loss: 0.28954 acc: 0.91146 | v_loss: 0.53150 v_acc: 0.88379 |  iteration: 2688 teacher: 1 stage: sket

batch 259 loss: 0.26611 acc: 0.92318 | v_loss: 0.55477 v_acc: 0.87500 |  iteration: 2746 teacher: 0 stage: sketch lr: 0.000480
batch 260 loss: 0.29466 acc: 0.91178 | v_loss: 0.85863 v_acc: 0.83366 |  iteration: 2747 teacher: 1 stage: sketch lr: 0.000480
batch 261 loss: 0.29559 acc: 0.91341 | v_loss: 0.53876 v_acc: 0.89128 |  iteration: 2748 teacher: 0 stage: sketch lr: 0.000480
batch 262 loss: 0.25528 acc: 0.92415 | v_loss: 0.52611 v_acc: 0.89062 |  iteration: 2749 teacher: 1 stage: sketch lr: 0.000480
batch 263 loss: 0.33451 acc: 0.90592 | v_loss: 0.53029 v_acc: 0.88216 |  iteration: 2750 teacher: 0 stage: sketch lr: 0.000480
batch 264 loss: 0.32715 acc: 0.91243 | v_loss: 0.47313 v_acc: 0.89193 |  iteration: 2751 teacher: 1 stage: sketch lr: 0.000481
batch 265 loss: 0.29653 acc: 0.91699 | v_loss: 0.49693 v_acc: 0.89193 |  iteration: 2752 teacher: 1 stage: sketch lr: 0.000481
batch 266 loss: 0.25531 acc: 0.92448 | v_loss: 0.65335 v_acc: 0.86133 |  iteration: 2753 teacher: 0 stage: sket

batch 324 loss: 0.21519 acc: 0.93717 | v_loss: 0.53904 v_acc: 0.88053 |  iteration: 2811 teacher: 0 stage: sketch lr: 0.000491
batch 325 loss: 0.32310 acc: 0.90788 | v_loss: 0.60114 v_acc: 0.86068 |  iteration: 2812 teacher: 0 stage: sketch lr: 0.000491
batch 326 loss: 0.29543 acc: 0.92220 | v_loss: 0.59581 v_acc: 0.88053 |  iteration: 2813 teacher: 1 stage: sketch lr: 0.000491
batch 327 loss: 0.28964 acc: 0.91178 | v_loss: 0.62900 v_acc: 0.87109 |  iteration: 2814 teacher: 1 stage: sketch lr: 0.000492
batch 328 loss: 0.32886 acc: 0.90592 | v_loss: 0.39670 v_acc: 0.90853 |  iteration: 2815 teacher: 0 stage: sketch lr: 0.000492
batch 329 loss: 0.25813 acc: 0.92480 | v_loss: 0.61433 v_acc: 0.87826 |  iteration: 2816 teacher: 1 stage: sketch lr: 0.000492
batch 330 loss: 0.32123 acc: 0.90885 | v_loss: 0.53155 v_acc: 0.87695 |  iteration: 2817 teacher: 0 stage: sketch lr: 0.000492
batch 331 loss: 0.34690 acc: 0.90072 | v_loss: 0.87500 v_acc: 0.83138 |  iteration: 2818 teacher: 0 stage: sket

batch 389 loss: 0.37825 acc: 0.89518 | v_loss: 0.59387 v_acc: 0.87500 |  iteration: 2876 teacher: 1 stage: sketch lr: 0.000502
batch 390 loss: 0.32508 acc: 0.90755 | v_loss: 0.66570 v_acc: 0.86556 |  iteration: 2877 teacher: 1 stage: sketch lr: 0.000503
batch 391 loss: 0.37845 acc: 0.90462 | v_loss: 0.43218 v_acc: 0.89323 |  iteration: 2878 teacher: 1 stage: sketch lr: 0.000503
batch 392 loss: 0.22198 acc: 0.93457 | v_loss: 0.67209 v_acc: 0.85449 |  iteration: 2879 teacher: 1 stage: sketch lr: 0.000503
batch 393 loss: 0.29924 acc: 0.91341 | v_loss: 0.47218 v_acc: 0.88086 |  iteration: 2880 teacher: 1 stage: sketch lr: 0.000503
batch 394 loss: 0.33955 acc: 0.89876 | v_loss: 0.48930 v_acc: 0.88802 |  iteration: 2881 teacher: 0 stage: sketch lr: 0.000503
batch 395 loss: 0.28752 acc: 0.92122 | v_loss: 0.46670 v_acc: 0.88574 |  iteration: 2882 teacher: 0 stage: sketch lr: 0.000503
batch 396 loss: 0.30668 acc: 0.90397 | v_loss: 0.72466 v_acc: 0.84961 |  iteration: 2883 teacher: 0 stage: sket

batch 454 loss: 0.28341 acc: 0.92383 | v_loss: 0.49331 v_acc: 0.89095 |  iteration: 2941 teacher: 1 stage: sketch lr: 0.000514
batch 455 loss: 0.26874 acc: 0.92318 | v_loss: 0.61262 v_acc: 0.88053 |  iteration: 2942 teacher: 1 stage: sketch lr: 0.000514
batch 456 loss: 0.21251 acc: 0.93587 | v_loss: 0.51760 v_acc: 0.88737 |  iteration: 2943 teacher: 0 stage: sketch lr: 0.000514
batch 457 loss: 0.27181 acc: 0.91797 | v_loss: 0.50042 v_acc: 0.89258 |  iteration: 2944 teacher: 0 stage: sketch lr: 0.000514
batch 458 loss: 0.31211 acc: 0.90918 | v_loss: 0.48519 v_acc: 0.90723 |  iteration: 2945 teacher: 1 stage: sketch lr: 0.000514
batch 459 loss: 0.38826 acc: 0.89128 | v_loss: 0.46951 v_acc: 0.90169 |  iteration: 2946 teacher: 1 stage: sketch lr: 0.000515
batch 460 loss: 0.30505 acc: 0.91536 | v_loss: 0.36315 v_acc: 0.92676 |  iteration: 2947 teacher: 1 stage: sketch lr: 0.000515
batch 461 loss: 0.18972 acc: 0.94694 | v_loss: 0.68588 v_acc: 0.86914 |  iteration: 2948 teacher: 1 stage: sket

batch 519 loss: 0.28015 acc: 0.92090 | v_loss: 0.41027 v_acc: 0.90267 |  iteration: 3006 teacher: 1 stage: sketch lr: 0.000525
batch 520 loss: 0.24355 acc: 0.92839 | v_loss: 0.50926 v_acc: 0.86947 |  iteration: 3007 teacher: 1 stage: sketch lr: 0.000525
batch 521 loss: 0.34112 acc: 0.90267 | v_loss: 0.60109 v_acc: 0.86556 |  iteration: 3008 teacher: 1 stage: sketch lr: 0.000525
batch 522 loss: 0.30813 acc: 0.92057 | v_loss: 0.66522 v_acc: 0.85579 |  iteration: 3009 teacher: 0 stage: sketch lr: 0.000526
batch 523 loss: 0.31322 acc: 0.90690 | v_loss: 0.48938 v_acc: 0.89714 |  iteration: 3010 teacher: 0 stage: sketch lr: 0.000526
batch 524 loss: 0.25802 acc: 0.92611 | v_loss: 0.40317 v_acc: 0.90560 |  iteration: 3011 teacher: 1 stage: sketch lr: 0.000526
batch 525 loss: 0.35078 acc: 0.91276 | v_loss: 0.29984 v_acc: 0.92188 |  iteration: 3012 teacher: 1 stage: sketch lr: 0.000526
batch 526 loss: 0.29925 acc: 0.91634 | v_loss: 0.47186 v_acc: 0.90788 |  iteration: 3013 teacher: 1 stage: sket

batch 584 loss: 0.33499 acc: 0.89616 | v_loss: 0.57978 v_acc: 0.86068 |  iteration: 3071 teacher: 0 stage: sketch lr: 0.000536
batch 585 loss: 0.28709 acc: 0.91374 | v_loss: 0.74611 v_acc: 0.85254 |  iteration: 3072 teacher: 1 stage: sketch lr: 0.000537
batch 586 loss: 0.32442 acc: 0.90267 | v_loss: 0.53750 v_acc: 0.89095 |  iteration: 3073 teacher: 0 stage: sketch lr: 0.000537
batch 587 loss: 0.27135 acc: 0.92318 | v_loss: 0.59807 v_acc: 0.87956 |  iteration: 3074 teacher: 1 stage: sketch lr: 0.000537
batch 588 loss: 0.26271 acc: 0.92057 | v_loss: 0.66508 v_acc: 0.88281 |  iteration: 3075 teacher: 0 stage: sketch lr: 0.000537
batch 589 loss: 0.26321 acc: 0.92871 | v_loss: 0.58629 v_acc: 0.86458 |  iteration: 3076 teacher: 1 stage: sketch lr: 0.000537
batch 590 loss: 0.40721 acc: 0.89095 | v_loss: 0.43217 v_acc: 0.90495 |  iteration: 3077 teacher: 0 stage: sketch lr: 0.000538
batch 591 loss: 0.25163 acc: 0.92708 | v_loss: 0.83566 v_acc: 0.83984 |  iteration: 3078 teacher: 0 stage: sket

batch 649 loss: 0.32156 acc: 0.90755 | v_loss: 0.55199 v_acc: 0.89486 |  iteration: 3136 teacher: 0 stage: sketch lr: 0.000548
batch 650 loss: 0.28622 acc: 0.92415 | v_loss: 0.44416 v_acc: 0.91178 |  iteration: 3137 teacher: 1 stage: sketch lr: 0.000548
batch 651 loss: 0.24865 acc: 0.93229 | v_loss: 1.19059 v_acc: 0.79850 |  iteration: 3138 teacher: 1 stage: sketch lr: 0.000548
batch 652 loss: 0.28754 acc: 0.91862 | v_loss: 0.40330 v_acc: 0.91243 |  iteration: 3139 teacher: 0 stage: sketch lr: 0.000548
batch 653 loss: 0.22799 acc: 0.93750 | v_loss: 0.58206 v_acc: 0.87500 |  iteration: 3140 teacher: 0 stage: sketch lr: 0.000549
batch 654 loss: 0.27657 acc: 0.92480 | v_loss: 0.51716 v_acc: 0.88574 |  iteration: 3141 teacher: 1 stage: sketch lr: 0.000549
batch 655 loss: 0.30323 acc: 0.91439 | v_loss: 0.41455 v_acc: 0.90365 |  iteration: 3142 teacher: 1 stage: sketch lr: 0.000549
batch 656 loss: 0.35625 acc: 0.89128 | v_loss: 0.47504 v_acc: 0.89193 |  iteration: 3143 teacher: 0 stage: sket

batch 714 loss: 0.30463 acc: 0.90951 | v_loss: 0.55708 v_acc: 0.87891 |  iteration: 3201 teacher: 1 stage: sketch lr: 0.000559
batch 715 loss: 0.27575 acc: 0.92318 | v_loss: 0.73054 v_acc: 0.85254 |  iteration: 3202 teacher: 0 stage: sketch lr: 0.000559
batch 716 loss: 0.28547 acc: 0.91927 | v_loss: 0.51090 v_acc: 0.88835 |  iteration: 3203 teacher: 1 stage: sketch lr: 0.000560
batch 717 loss: 0.27021 acc: 0.92611 | v_loss: 0.50033 v_acc: 0.89193 |  iteration: 3204 teacher: 1 stage: sketch lr: 0.000560
batch 718 loss: 0.36940 acc: 0.90169 | v_loss: 0.47690 v_acc: 0.88639 |  iteration: 3205 teacher: 0 stage: sketch lr: 0.000560
batch 719 loss: 0.34242 acc: 0.90332 | v_loss: 0.43648 v_acc: 0.90332 |  iteration: 3206 teacher: 0 stage: sketch lr: 0.000560
batch 720 loss: 0.28512 acc: 0.92969 | v_loss: 0.47525 v_acc: 0.89551 |  iteration: 3207 teacher: 1 stage: sketch lr: 0.000560
batch 721 loss: 0.27151 acc: 0.91471 | v_loss: 0.58724 v_acc: 0.86784 |  iteration: 3208 teacher: 0 stage: sket

batch 779 loss: 0.33733 acc: 0.91081 | v_loss: 0.52486 v_acc: 0.89030 |  iteration: 3266 teacher: 1 stage: sketch lr: 0.000571
batch 780 loss: 0.26601 acc: 0.91764 | v_loss: 0.63433 v_acc: 0.85254 |  iteration: 3267 teacher: 0 stage: sketch lr: 0.000571
batch 781 loss: 0.29695 acc: 0.91504 | v_loss: 0.56921 v_acc: 0.88802 |  iteration: 3268 teacher: 1 stage: sketch lr: 0.000571
batch 782 loss: 0.23866 acc: 0.92871 | v_loss: 0.62181 v_acc: 0.87793 |  iteration: 3269 teacher: 0 stage: sketch lr: 0.000571
batch 783 loss: 0.21482 acc: 0.93913 | v_loss: 0.40401 v_acc: 0.90951 |  iteration: 3270 teacher: 1 stage: sketch lr: 0.000571
batch 784 loss: 0.32002 acc: 0.91113 | v_loss: 0.52183 v_acc: 0.89160 |  iteration: 3271 teacher: 0 stage: sketch lr: 0.000571
batch 785 loss: 0.33400 acc: 0.91276 | v_loss: 0.52869 v_acc: 0.88086 |  iteration: 3272 teacher: 0 stage: sketch lr: 0.000572
batch 786 loss: 0.32658 acc: 0.90853 | v_loss: 0.89914 v_acc: 0.82650 |  iteration: 3273 teacher: 1 stage: sket

batch 844 loss: 0.32624 acc: 0.90397 | v_loss: 0.53033 v_acc: 0.88835 |  iteration: 3331 teacher: 0 stage: sketch lr: 0.000582
batch 845 loss: 0.31491 acc: 0.90560 | v_loss: 0.65073 v_acc: 0.86751 |  iteration: 3332 teacher: 0 stage: sketch lr: 0.000582
batch 846 loss: 0.27675 acc: 0.91829 | v_loss: 0.41342 v_acc: 0.89290 |  iteration: 3333 teacher: 0 stage: sketch lr: 0.000582
batch 847 loss: 0.34691 acc: 0.90365 | v_loss: 0.64761 v_acc: 0.85579 |  iteration: 3334 teacher: 1 stage: sketch lr: 0.000582
batch 848 loss: 0.31126 acc: 0.90788 | v_loss: 0.42502 v_acc: 0.90202 |  iteration: 3335 teacher: 0 stage: sketch lr: 0.000583
batch 849 loss: 0.29528 acc: 0.91667 | v_loss: 0.47263 v_acc: 0.88574 |  iteration: 3336 teacher: 0 stage: sketch lr: 0.000583
batch 850 loss: 0.27562 acc: 0.92546 | v_loss: 0.41858 v_acc: 0.89486 |  iteration: 3337 teacher: 0 stage: sketch lr: 0.000583
batch 851 loss: 0.24401 acc: 0.92936 | v_loss: 0.74132 v_acc: 0.85091 |  iteration: 3338 teacher: 0 stage: sket

batch 909 loss: 0.23153 acc: 0.93229 | v_loss: 0.50365 v_acc: 0.89876 |  iteration: 3396 teacher: 1 stage: sketch lr: 0.000593
batch 910 loss: 0.27747 acc: 0.91895 | v_loss: 0.61199 v_acc: 0.88932 |  iteration: 3397 teacher: 1 stage: sketch lr: 0.000593
batch 911 loss: 0.26202 acc: 0.92611 | v_loss: 0.52440 v_acc: 0.89355 |  iteration: 3398 teacher: 1 stage: sketch lr: 0.000594
batch 912 loss: 0.27956 acc: 0.92513 | v_loss: 0.49123 v_acc: 0.89681 |  iteration: 3399 teacher: 1 stage: sketch lr: 0.000594
batch 913 loss: 0.35323 acc: 0.90527 | v_loss: 0.51158 v_acc: 0.89974 |  iteration: 3400 teacher: 1 stage: sketch lr: 0.000594
batch 914 loss: 0.27578 acc: 0.92285 | v_loss: 0.42613 v_acc: 0.90267 |  iteration: 3401 teacher: 0 stage: sketch lr: 0.000594
batch 915 loss: 0.27309 acc: 0.92253 | v_loss: 0.34399 v_acc: 0.92318 |  iteration: 3402 teacher: 1 stage: sketch lr: 0.000594
batch 916 loss: 0.22345 acc: 0.93164 | v_loss: 0.63107 v_acc: 0.86816 |  iteration: 3403 teacher: 1 stage: sket

batch 974 loss: 0.34477 acc: 0.89876 | v_loss: 0.41821 v_acc: 0.90397 |  iteration: 3461 teacher: 0 stage: sketch lr: 0.000605
batch 975 loss: 0.25187 acc: 0.92546 | v_loss: 0.54970 v_acc: 0.87370 |  iteration: 3462 teacher: 0 stage: sketch lr: 0.000605
batch 976 loss: 0.27795 acc: 0.92188 | v_loss: 0.60102 v_acc: 0.86947 |  iteration: 3463 teacher: 0 stage: sketch lr: 0.000605
batch 977 loss: 0.23447 acc: 0.92904 | v_loss: 0.69268 v_acc: 0.85026 |  iteration: 3464 teacher: 0 stage: sketch lr: 0.000605
batch 978 loss: 0.32514 acc: 0.90918 | v_loss: 0.51097 v_acc: 0.89193 |  iteration: 3465 teacher: 0 stage: sketch lr: 0.000605
batch 979 loss: 0.28680 acc: 0.91699 | v_loss: 0.38748 v_acc: 0.90755 |  iteration: 3466 teacher: 1 stage: sketch lr: 0.000605
batch 980 loss: 0.32433 acc: 0.90592 | v_loss: 0.31063 v_acc: 0.91764 |  iteration: 3467 teacher: 0 stage: sketch lr: 0.000606
batch 981 loss: 0.33419 acc: 0.90723 | v_loss: 0.46862 v_acc: 0.89876 |  iteration: 3468 teacher: 1 stage: sket

batch 1039 loss: 0.27397 acc: 0.92350 | v_loss: 0.55685 v_acc: 0.86751 |  iteration: 3526 teacher: 0 stage: sketch lr: 0.000616
batch 1040 loss: 0.27463 acc: 0.92155 | v_loss: 0.77255 v_acc: 0.84733 |  iteration: 3527 teacher: 0 stage: sketch lr: 0.000616
batch 1041 loss: 0.28268 acc: 0.91569 | v_loss: 0.59494 v_acc: 0.88314 |  iteration: 3528 teacher: 1 stage: sketch lr: 0.000616
batch 1042 loss: 0.31770 acc: 0.90951 | v_loss: 0.59203 v_acc: 0.88118 |  iteration: 3529 teacher: 0 stage: sketch lr: 0.000616
batch 1043 loss: 0.28924 acc: 0.91927 | v_loss: 0.66956 v_acc: 0.87402 |  iteration: 3530 teacher: 1 stage: sketch lr: 0.000617
batch 1044 loss: 0.30099 acc: 0.90625 | v_loss: 0.62483 v_acc: 0.85645 |  iteration: 3531 teacher: 0 stage: sketch lr: 0.000617
batch 1045 loss: 0.32709 acc: 0.90202 | v_loss: 0.49533 v_acc: 0.89648 |  iteration: 3532 teacher: 0 stage: sketch lr: 0.000617
batch 1046 loss: 0.30966 acc: 0.90853 | v_loss: 0.80642 v_acc: 0.83301 |  iteration: 3533 teacher: 0 sta

batch 1104 loss: 0.28930 acc: 0.91341 | v_loss: 0.59002 v_acc: 0.89225 |  iteration: 3591 teacher: 0 stage: sketch lr: 0.000627
batch 1105 loss: 0.28520 acc: 0.91569 | v_loss: 0.40778 v_acc: 0.91569 |  iteration: 3592 teacher: 0 stage: sketch lr: 0.000627
batch 1106 loss: 0.33847 acc: 0.90234 | v_loss: 1.31748 v_acc: 0.80990 |  iteration: 3593 teacher: 1 stage: sketch lr: 0.000628
batch 1107 loss: 0.35548 acc: 0.90332 | v_loss: 0.39800 v_acc: 0.92253 |  iteration: 3594 teacher: 0 stage: sketch lr: 0.000628
batch 1108 loss: 0.22543 acc: 0.93913 | v_loss: 0.61576 v_acc: 0.88086 |  iteration: 3595 teacher: 0 stage: sketch lr: 0.000628
batch 1109 loss: 0.20353 acc: 0.94010 | v_loss: 0.54455 v_acc: 0.89095 |  iteration: 3596 teacher: 0 stage: sketch lr: 0.000628
batch 1110 loss: 0.28863 acc: 0.92318 | v_loss: 0.41994 v_acc: 0.90007 |  iteration: 3597 teacher: 0 stage: sketch lr: 0.000628
batch 1111 loss: 0.24657 acc: 0.91797 | v_loss: 0.51373 v_acc: 0.88346 |  iteration: 3598 teacher: 0 sta

batch 1169 loss: 0.31644 acc: 0.91211 | v_loss: 0.56057 v_acc: 0.88477 |  iteration: 3656 teacher: 1 stage: sketch lr: 0.000639
batch 1170 loss: 0.30082 acc: 0.91113 | v_loss: 0.76735 v_acc: 0.84017 |  iteration: 3657 teacher: 1 stage: sketch lr: 0.000639
batch 1171 loss: 0.33394 acc: 0.90690 | v_loss: 0.51727 v_acc: 0.90104 |  iteration: 3658 teacher: 0 stage: sketch lr: 0.000639
batch 1172 loss: 0.28772 acc: 0.91569 | v_loss: 0.53503 v_acc: 0.88542 |  iteration: 3659 teacher: 0 stage: sketch lr: 0.000639
batch 1173 loss: 0.22989 acc: 0.93066 | v_loss: 0.52409 v_acc: 0.88444 |  iteration: 3660 teacher: 0 stage: sketch lr: 0.000639
batch 1174 loss: 0.28115 acc: 0.92253 | v_loss: 0.44309 v_acc: 0.90332 |  iteration: 3661 teacher: 1 stage: sketch lr: 0.000640
batch 1175 loss: 0.36769 acc: 0.90007 | v_loss: 0.51179 v_acc: 0.89648 |  iteration: 3662 teacher: 1 stage: sketch lr: 0.000640
batch 1176 loss: 0.31208 acc: 0.90853 | v_loss: 0.57351 v_acc: 0.86230 |  iteration: 3663 teacher: 0 sta

batch 1234 loss: 0.29881 acc: 0.91536 | v_loss: 0.57767 v_acc: 0.86686 |  iteration: 3721 teacher: 0 stage: sketch lr: 0.000650
batch 1235 loss: 0.35050 acc: 0.89518 | v_loss: 0.63205 v_acc: 0.84668 |  iteration: 3722 teacher: 0 stage: sketch lr: 0.000650
batch 1236 loss: 0.30532 acc: 0.91146 | v_loss: 0.63047 v_acc: 0.87565 |  iteration: 3723 teacher: 0 stage: sketch lr: 0.000650
batch 1237 loss: 0.30596 acc: 0.91146 | v_loss: 0.56226 v_acc: 0.88867 |  iteration: 3724 teacher: 1 stage: sketch lr: 0.000651
batch 1238 loss: 0.39366 acc: 0.89225 | v_loss: 0.40287 v_acc: 0.90951 |  iteration: 3725 teacher: 1 stage: sketch lr: 0.000651
batch 1239 loss: 0.39402 acc: 0.88965 | v_loss: 0.54798 v_acc: 0.88444 |  iteration: 3726 teacher: 0 stage: sketch lr: 0.000651
batch 1240 loss: 0.35545 acc: 0.90592 | v_loss: 0.52228 v_acc: 0.88021 |  iteration: 3727 teacher: 0 stage: sketch lr: 0.000651
batch 1241 loss: 0.36155 acc: 0.89648 | v_loss: 0.82566 v_acc: 0.82943 |  iteration: 3728 teacher: 0 sta

batch 55 loss: 0.29536 acc: 0.92122 | v_loss: 0.40508 v_acc: 0.90983 |  iteration: 3785 teacher: 1 stage: sketch lr: 0.000661
batch 56 loss: 0.27539 acc: 0.91862 | v_loss: 0.58775 v_acc: 0.88770 |  iteration: 3786 teacher: 0 stage: sketch lr: 0.000661
batch 57 loss: 0.28089 acc: 0.91862 | v_loss: 0.47089 v_acc: 0.89258 |  iteration: 3787 teacher: 0 stage: sketch lr: 0.000662
batch 58 loss: 0.31454 acc: 0.90918 | v_loss: 0.92383 v_acc: 0.83398 |  iteration: 3788 teacher: 0 stage: sketch lr: 0.000662
batch 59 loss: 0.29112 acc: 0.91634 | v_loss: 0.41548 v_acc: 0.90560 |  iteration: 3789 teacher: 1 stage: sketch lr: 0.000662
batch 60 loss: 0.29281 acc: 0.91243 | v_loss: 0.35287 v_acc: 0.92676 |  iteration: 3790 teacher: 0 stage: sketch lr: 0.000662
batch 61 loss: 0.31661 acc: 0.90951 | v_loss: 0.57292 v_acc: 0.88118 |  iteration: 3791 teacher: 0 stage: sketch lr: 0.000662
batch 62 loss: 0.29479 acc: 0.91764 | v_loss: 0.60923 v_acc: 0.88314 |  iteration: 3792 teacher: 0 stage: sketch lr: 0

batch 120 loss: 0.32068 acc: 0.91569 | v_loss: 0.44080 v_acc: 0.89681 |  iteration: 3850 teacher: 1 stage: sketch lr: 0.000673
batch 121 loss: 0.29202 acc: 0.91536 | v_loss: 0.46216 v_acc: 0.88965 |  iteration: 3851 teacher: 0 stage: sketch lr: 0.000673
batch 122 loss: 0.34198 acc: 0.90007 | v_loss: 0.43586 v_acc: 0.88997 |  iteration: 3852 teacher: 0 stage: sketch lr: 0.000673
batch 123 loss: 0.25776 acc: 0.92220 | v_loss: 0.72142 v_acc: 0.85449 |  iteration: 3853 teacher: 1 stage: sketch lr: 0.000673
batch 124 loss: 0.26216 acc: 0.92415 | v_loss: 0.50499 v_acc: 0.89941 |  iteration: 3854 teacher: 0 stage: sketch lr: 0.000673
batch 125 loss: 0.29786 acc: 0.91309 | v_loss: 0.54392 v_acc: 0.88639 |  iteration: 3855 teacher: 0 stage: sketch lr: 0.000673
batch 126 loss: 0.26377 acc: 0.92546 | v_loss: 0.68637 v_acc: 0.85872 |  iteration: 3856 teacher: 1 stage: sketch lr: 0.000674
batch 127 loss: 0.33964 acc: 0.90820 | v_loss: 0.46810 v_acc: 0.89714 |  iteration: 3857 teacher: 0 stage: sket

batch 185 loss: 0.22493 acc: 0.93229 | v_loss: 0.51025 v_acc: 0.89323 |  iteration: 3915 teacher: 1 stage: sketch lr: 0.000684
batch 186 loss: 0.29042 acc: 0.91243 | v_loss: 0.46723 v_acc: 0.89290 |  iteration: 3916 teacher: 1 stage: sketch lr: 0.000684
batch 187 loss: 0.28586 acc: 0.91276 | v_loss: 0.35529 v_acc: 0.91243 |  iteration: 3917 teacher: 0 stage: sketch lr: 0.000684
batch 188 loss: 0.35797 acc: 0.90234 | v_loss: 0.67411 v_acc: 0.85677 |  iteration: 3918 teacher: 0 stage: sketch lr: 0.000684
batch 189 loss: 0.36388 acc: 0.89518 | v_loss: 0.57707 v_acc: 0.86068 |  iteration: 3919 teacher: 0 stage: sketch lr: 0.000685
batch 190 loss: 0.35681 acc: 0.90527 | v_loss: 0.60498 v_acc: 0.86784 |  iteration: 3920 teacher: 1 stage: sketch lr: 0.000685
batch 191 loss: 0.28021 acc: 0.91536 | v_loss: 0.85000 v_acc: 0.83919 |  iteration: 3921 teacher: 1 stage: sketch lr: 0.000685
batch 192 loss: 0.34284 acc: 0.89486 | v_loss: 0.44960 v_acc: 0.90267 |  iteration: 3922 teacher: 0 stage: sket

batch 250 loss: 0.34257 acc: 0.90332 | v_loss: 0.50343 v_acc: 0.90039 |  iteration: 3980 teacher: 1 stage: sketch lr: 0.000695
batch 251 loss: 0.31472 acc: 0.91081 | v_loss: 0.41766 v_acc: 0.90365 |  iteration: 3981 teacher: 1 stage: sketch lr: 0.000695
batch 252 loss: 0.26739 acc: 0.92513 | v_loss: 0.28475 v_acc: 0.92643 |  iteration: 3982 teacher: 0 stage: sketch lr: 0.000696
batch 253 loss: 0.27893 acc: 0.92188 | v_loss: 0.46634 v_acc: 0.89909 |  iteration: 3983 teacher: 1 stage: sketch lr: 0.000696
batch 254 loss: 0.23524 acc: 0.93555 | v_loss: 0.50950 v_acc: 0.88477 |  iteration: 3984 teacher: 1 stage: sketch lr: 0.000696
batch 255 loss: 0.24528 acc: 0.92318 | v_loss: 0.82887 v_acc: 0.85840 |  iteration: 3985 teacher: 0 stage: sketch lr: 0.000696
batch 256 loss: 0.26797 acc: 0.91699 | v_loss: 0.54149 v_acc: 0.88704 |  iteration: 3986 teacher: 1 stage: sketch lr: 0.000696
batch 257 loss: 0.31626 acc: 0.90885 | v_loss: 0.67064 v_acc: 0.86816 |  iteration: 3987 teacher: 0 stage: sket

batch 315 loss: 0.31654 acc: 0.91960 | v_loss: 0.67173 v_acc: 0.87533 |  iteration: 4045 teacher: 0 stage: sketch lr: 0.000695
batch 316 loss: 0.22299 acc: 0.93717 | v_loss: 0.57743 v_acc: 0.86393 |  iteration: 4046 teacher: 0 stage: sketch lr: 0.000695
batch 317 loss: 0.18744 acc: 0.94010 | v_loss: 0.44721 v_acc: 0.90299 |  iteration: 4047 teacher: 0 stage: sketch lr: 0.000695
batch 318 loss: 0.21309 acc: 0.93392 | v_loss: 0.77553 v_acc: 0.83561 |  iteration: 4048 teacher: 1 stage: sketch lr: 0.000695
batch 319 loss: 0.31415 acc: 0.90430 | v_loss: 0.58258 v_acc: 0.87891 |  iteration: 4049 teacher: 0 stage: sketch lr: 0.000695
batch 320 loss: 0.31317 acc: 0.90430 | v_loss: 0.54696 v_acc: 0.87858 |  iteration: 4050 teacher: 1 stage: sketch lr: 0.000694
batch 321 loss: 0.26633 acc: 0.92155 | v_loss: 0.53124 v_acc: 0.87467 |  iteration: 4051 teacher: 0 stage: sketch lr: 0.000694
batch 322 loss: 0.30192 acc: 0.91471 | v_loss: 0.49145 v_acc: 0.89453 |  iteration: 4052 teacher: 0 stage: sket

batch 380 loss: 0.26098 acc: 0.92676 | v_loss: 0.63295 v_acc: 0.88053 |  iteration: 4110 teacher: 0 stage: sketch lr: 0.000689
batch 381 loss: 0.27520 acc: 0.93001 | v_loss: 0.50951 v_acc: 0.89811 |  iteration: 4111 teacher: 0 stage: sketch lr: 0.000689
batch 382 loss: 0.24697 acc: 0.93685 | v_loss: 0.41982 v_acc: 0.90820 |  iteration: 4112 teacher: 0 stage: sketch lr: 0.000689
batch 383 loss: 0.26750 acc: 0.93001 | v_loss: 0.52890 v_acc: 0.88574 |  iteration: 4113 teacher: 1 stage: sketch lr: 0.000689
batch 384 loss: 0.31957 acc: 0.90397 | v_loss: 0.60369 v_acc: 0.86784 |  iteration: 4114 teacher: 1 stage: sketch lr: 0.000689
batch 385 loss: 0.23158 acc: 0.93164 | v_loss: 0.44058 v_acc: 0.89746 |  iteration: 4115 teacher: 0 stage: sketch lr: 0.000689
batch 386 loss: 0.31827 acc: 0.91341 | v_loss: 0.60823 v_acc: 0.86035 |  iteration: 4116 teacher: 1 stage: sketch lr: 0.000689
batch 387 loss: 0.30228 acc: 0.91536 | v_loss: 0.38797 v_acc: 0.90039 |  iteration: 4117 teacher: 1 stage: sket

batch 445 loss: 0.29535 acc: 0.91634 | v_loss: 0.49833 v_acc: 0.88477 |  iteration: 4175 teacher: 0 stage: sketch lr: 0.000684
batch 446 loss: 0.31388 acc: 0.91146 | v_loss: 0.41190 v_acc: 0.90039 |  iteration: 4176 teacher: 0 stage: sketch lr: 0.000684
batch 447 loss: 0.25517 acc: 0.92806 | v_loss: 0.45753 v_acc: 0.90430 |  iteration: 4177 teacher: 1 stage: sketch lr: 0.000684
batch 448 loss: 0.27202 acc: 0.92383 | v_loss: 0.60311 v_acc: 0.87077 |  iteration: 4178 teacher: 1 stage: sketch lr: 0.000684
batch 449 loss: 0.23233 acc: 0.93066 | v_loss: 0.37788 v_acc: 0.91960 |  iteration: 4179 teacher: 1 stage: sketch lr: 0.000684
batch 450 loss: 0.24281 acc: 0.92871 | v_loss: 0.87699 v_acc: 0.82389 |  iteration: 4180 teacher: 1 stage: sketch lr: 0.000684
batch 451 loss: 0.32508 acc: 0.90853 | v_loss: 0.61009 v_acc: 0.87760 |  iteration: 4181 teacher: 1 stage: sketch lr: 0.000683
batch 452 loss: 0.33209 acc: 0.90560 | v_loss: 0.70101 v_acc: 0.86426 |  iteration: 4182 teacher: 1 stage: sket

batch 510 loss: 0.36176 acc: 0.90007 | v_loss: 0.41661 v_acc: 0.89746 |  iteration: 4240 teacher: 0 stage: sketch lr: 0.000679
batch 511 loss: 0.32093 acc: 0.90430 | v_loss: 0.60450 v_acc: 0.86393 |  iteration: 4241 teacher: 1 stage: sketch lr: 0.000679
batch 512 loss: 0.34974 acc: 0.90397 | v_loss: 0.50507 v_acc: 0.87728 |  iteration: 4242 teacher: 1 stage: sketch lr: 0.000679
batch 513 loss: 0.34304 acc: 0.90104 | v_loss: 0.89332 v_acc: 0.81934 |  iteration: 4243 teacher: 0 stage: sketch lr: 0.000678
batch 514 loss: 0.50256 acc: 0.86686 | v_loss: 0.38113 v_acc: 0.89746 |  iteration: 4244 teacher: 1 stage: sketch lr: 0.000678
batch 515 loss: 0.38722 acc: 0.89388 | v_loss: 0.29714 v_acc: 0.92350 |  iteration: 4245 teacher: 0 stage: sketch lr: 0.000678
batch 516 loss: 0.23534 acc: 0.92741 | v_loss: 0.53631 v_acc: 0.88346 |  iteration: 4246 teacher: 0 stage: sketch lr: 0.000678
batch 517 loss: 0.30107 acc: 0.91243 | v_loss: 0.56778 v_acc: 0.87891 |  iteration: 4247 teacher: 0 stage: sket

batch 575 loss: 0.31324 acc: 0.90755 | v_loss: 0.45692 v_acc: 0.89290 |  iteration: 4305 teacher: 0 stage: sketch lr: 0.000674
batch 576 loss: 0.44133 acc: 0.87988 | v_loss: 0.44223 v_acc: 0.88672 |  iteration: 4306 teacher: 1 stage: sketch lr: 0.000673
batch 577 loss: 0.21292 acc: 0.93327 | v_loss: 0.43093 v_acc: 0.89290 |  iteration: 4307 teacher: 0 stage: sketch lr: 0.000673
batch 578 loss: 0.26582 acc: 0.91634 | v_loss: 0.74394 v_acc: 0.84049 |  iteration: 4308 teacher: 0 stage: sketch lr: 0.000673
batch 579 loss: 0.31522 acc: 0.90495 | v_loss: 0.50521 v_acc: 0.88965 |  iteration: 4309 teacher: 1 stage: sketch lr: 0.000673
batch 580 loss: 0.28628 acc: 0.91960 | v_loss: 0.53614 v_acc: 0.88021 |  iteration: 4310 teacher: 1 stage: sketch lr: 0.000673
batch 581 loss: 0.32595 acc: 0.91309 | v_loss: 0.68185 v_acc: 0.85612 |  iteration: 4311 teacher: 0 stage: sketch lr: 0.000673
batch 582 loss: 0.21729 acc: 0.93783 | v_loss: 0.46220 v_acc: 0.89876 |  iteration: 4312 teacher: 1 stage: sket

batch 640 loss: 0.36834 acc: 0.90007 | v_loss: 0.50078 v_acc: 0.88737 |  iteration: 4370 teacher: 1 stage: sketch lr: 0.000669
batch 641 loss: 0.32474 acc: 0.90560 | v_loss: 0.43673 v_acc: 0.88835 |  iteration: 4371 teacher: 0 stage: sketch lr: 0.000668
batch 642 loss: 0.32686 acc: 0.90299 | v_loss: 0.39053 v_acc: 0.90820 |  iteration: 4372 teacher: 0 stage: sketch lr: 0.000668
batch 643 loss: 0.23114 acc: 0.93620 | v_loss: 0.60557 v_acc: 0.87142 |  iteration: 4373 teacher: 1 stage: sketch lr: 0.000668
batch 644 loss: 0.29347 acc: 0.91016 | v_loss: 0.54784 v_acc: 0.87240 |  iteration: 4374 teacher: 0 stage: sketch lr: 0.000668
batch 645 loss: 0.29503 acc: 0.91764 | v_loss: 0.60267 v_acc: 0.86458 |  iteration: 4375 teacher: 0 stage: sketch lr: 0.000668
batch 646 loss: 0.24892 acc: 0.93197 | v_loss: 0.79166 v_acc: 0.84961 |  iteration: 4376 teacher: 0 stage: sketch lr: 0.000668
batch 647 loss: 0.26766 acc: 0.92448 | v_loss: 0.42756 v_acc: 0.90658 |  iteration: 4377 teacher: 0 stage: sket

batch 705 loss: 0.23700 acc: 0.93424 | v_loss: 0.52397 v_acc: 0.88900 |  iteration: 4435 teacher: 0 stage: sketch lr: 0.000664
batch 706 loss: 0.18132 acc: 0.94434 | v_loss: 0.38426 v_acc: 0.90495 |  iteration: 4436 teacher: 1 stage: sketch lr: 0.000664
batch 707 loss: 0.31950 acc: 0.91569 | v_loss: 0.31354 v_acc: 0.92741 |  iteration: 4437 teacher: 0 stage: sketch lr: 0.000663
batch 708 loss: 0.35815 acc: 0.90625 | v_loss: 0.46009 v_acc: 0.90234 |  iteration: 4438 teacher: 0 stage: sketch lr: 0.000663
batch 709 loss: 0.39735 acc: 0.89160 | v_loss: 0.48277 v_acc: 0.89258 |  iteration: 4439 teacher: 1 stage: sketch lr: 0.000663
batch 710 loss: 0.36739 acc: 0.90495 | v_loss: 0.82033 v_acc: 0.85579 |  iteration: 4440 teacher: 1 stage: sketch lr: 0.000663
batch 711 loss: 0.29226 acc: 0.91146 | v_loss: 0.55767 v_acc: 0.87923 |  iteration: 4441 teacher: 0 stage: sketch lr: 0.000663
batch 712 loss: 0.28588 acc: 0.91764 | v_loss: 0.65955 v_acc: 0.86100 |  iteration: 4442 teacher: 0 stage: sket

batch 770 loss: 0.34061 acc: 0.90202 | v_loss: 0.64404 v_acc: 0.87923 |  iteration: 4500 teacher: 0 stage: sketch lr: 0.000659
batch 771 loss: 0.30034 acc: 0.91113 | v_loss: 0.56134 v_acc: 0.86426 |  iteration: 4501 teacher: 1 stage: sketch lr: 0.000659
batch 772 loss: 0.19656 acc: 0.93848 | v_loss: 0.44929 v_acc: 0.90495 |  iteration: 4502 teacher: 0 stage: sketch lr: 0.000659
batch 773 loss: 0.36396 acc: 0.90072 | v_loss: 0.78133 v_acc: 0.84440 |  iteration: 4503 teacher: 1 stage: sketch lr: 0.000659
batch 774 loss: 0.26560 acc: 0.92513 | v_loss: 0.57040 v_acc: 0.88737 |  iteration: 4504 teacher: 1 stage: sketch lr: 0.000659
batch 775 loss: 0.26623 acc: 0.92025 | v_loss: 0.56323 v_acc: 0.87402 |  iteration: 4505 teacher: 0 stage: sketch lr: 0.000658
batch 776 loss: 0.30652 acc: 0.91536 | v_loss: 0.52872 v_acc: 0.88672 |  iteration: 4506 teacher: 0 stage: sketch lr: 0.000658
batch 777 loss: 0.29076 acc: 0.91862 | v_loss: 0.49309 v_acc: 0.88802 |  iteration: 4507 teacher: 0 stage: sket

batch 835 loss: 0.27454 acc: 0.92285 | v_loss: 0.63190 v_acc: 0.87435 |  iteration: 4565 teacher: 0 stage: sketch lr: 0.000654
batch 836 loss: 0.25326 acc: 0.92643 | v_loss: 0.55834 v_acc: 0.88965 |  iteration: 4566 teacher: 1 stage: sketch lr: 0.000654
batch 837 loss: 0.32172 acc: 0.91504 | v_loss: 0.41406 v_acc: 0.90918 |  iteration: 4567 teacher: 1 stage: sketch lr: 0.000654
batch 838 loss: 0.26702 acc: 0.92122 | v_loss: 0.51396 v_acc: 0.88542 |  iteration: 4568 teacher: 0 stage: sketch lr: 0.000654
batch 839 loss: 0.26469 acc: 0.91602 | v_loss: 0.57514 v_acc: 0.87598 |  iteration: 4569 teacher: 1 stage: sketch lr: 0.000654
batch 840 loss: 0.37842 acc: 0.89746 | v_loss: 0.45803 v_acc: 0.90104 |  iteration: 4570 teacher: 1 stage: sketch lr: 0.000654
batch 841 loss: 0.23060 acc: 0.92741 | v_loss: 0.62584 v_acc: 0.87142 |  iteration: 4571 teacher: 1 stage: sketch lr: 0.000654
batch 842 loss: 0.28275 acc: 0.92546 | v_loss: 0.40563 v_acc: 0.90625 |  iteration: 4572 teacher: 0 stage: sket

batch 900 loss: 0.29014 acc: 0.91634 | v_loss: 0.48744 v_acc: 0.89355 |  iteration: 4630 teacher: 0 stage: sketch lr: 0.000649
batch 901 loss: 0.22915 acc: 0.93457 | v_loss: 0.44423 v_acc: 0.90853 |  iteration: 4631 teacher: 1 stage: sketch lr: 0.000649
batch 902 loss: 0.23388 acc: 0.93229 | v_loss: 0.45898 v_acc: 0.90332 |  iteration: 4632 teacher: 1 stage: sketch lr: 0.000649
batch 903 loss: 0.20643 acc: 0.93587 | v_loss: 0.61730 v_acc: 0.86816 |  iteration: 4633 teacher: 1 stage: sketch lr: 0.000649
batch 904 loss: 0.29030 acc: 0.91146 | v_loss: 0.35462 v_acc: 0.92253 |  iteration: 4634 teacher: 1 stage: sketch lr: 0.000649
batch 905 loss: 0.26009 acc: 0.92741 | v_loss: 0.99606 v_acc: 0.83171 |  iteration: 4635 teacher: 1 stage: sketch lr: 0.000649
batch 906 loss: 0.23206 acc: 0.92773 | v_loss: 0.64871 v_acc: 0.87500 |  iteration: 4636 teacher: 0 stage: sketch lr: 0.000649
batch 907 loss: 0.25271 acc: 0.93262 | v_loss: 0.72989 v_acc: 0.86328 |  iteration: 4637 teacher: 1 stage: sket

batch 965 loss: 0.28449 acc: 0.91895 | v_loss: 0.39110 v_acc: 0.90918 |  iteration: 4695 teacher: 0 stage: sketch lr: 0.000645
batch 966 loss: 0.34544 acc: 0.90527 | v_loss: 0.54885 v_acc: 0.88184 |  iteration: 4696 teacher: 0 stage: sketch lr: 0.000645
batch 967 loss: 0.28878 acc: 0.92448 | v_loss: 0.49211 v_acc: 0.87663 |  iteration: 4697 teacher: 1 stage: sketch lr: 0.000645
batch 968 loss: 0.27891 acc: 0.92253 | v_loss: 0.81658 v_acc: 0.82878 |  iteration: 4698 teacher: 1 stage: sketch lr: 0.000645
batch 969 loss: 0.28855 acc: 0.90430 | v_loss: 0.37197 v_acc: 0.90853 |  iteration: 4699 teacher: 0 stage: sketch lr: 0.000645
batch 970 loss: 0.22363 acc: 0.93392 | v_loss: 0.33115 v_acc: 0.92025 |  iteration: 4700 teacher: 1 stage: sketch lr: 0.000645
batch 971 loss: 0.23074 acc: 0.93424 | v_loss: 0.57264 v_acc: 0.87923 |  iteration: 4701 teacher: 1 stage: sketch lr: 0.000645
batch 972 loss: 0.32885 acc: 0.90267 | v_loss: 0.57867 v_acc: 0.87923 |  iteration: 4702 teacher: 0 stage: sket

batch 1030 loss: 0.25591 acc: 0.91960 | v_loss: 0.43741 v_acc: 0.89876 |  iteration: 4760 teacher: 1 stage: sketch lr: 0.000641
batch 1031 loss: 0.28520 acc: 0.92318 | v_loss: 0.46771 v_acc: 0.89388 |  iteration: 4761 teacher: 0 stage: sketch lr: 0.000640
batch 1032 loss: 0.33450 acc: 0.90267 | v_loss: 0.41668 v_acc: 0.90072 |  iteration: 4762 teacher: 0 stage: sketch lr: 0.000640
batch 1033 loss: 0.29962 acc: 0.91341 | v_loss: 0.73855 v_acc: 0.85482 |  iteration: 4763 teacher: 0 stage: sketch lr: 0.000640
batch 1034 loss: 0.27593 acc: 0.91178 | v_loss: 0.53004 v_acc: 0.89518 |  iteration: 4764 teacher: 0 stage: sketch lr: 0.000640
batch 1035 loss: 0.32367 acc: 0.90690 | v_loss: 0.56148 v_acc: 0.88346 |  iteration: 4765 teacher: 0 stage: sketch lr: 0.000640
batch 1036 loss: 0.20942 acc: 0.93490 | v_loss: 0.65989 v_acc: 0.85514 |  iteration: 4766 teacher: 0 stage: sketch lr: 0.000640
batch 1037 loss: 0.25734 acc: 0.92546 | v_loss: 0.45379 v_acc: 0.89388 |  iteration: 4767 teacher: 0 sta

batch 1095 loss: 0.21613 acc: 0.93359 | v_loss: 0.46253 v_acc: 0.90658 |  iteration: 4825 teacher: 1 stage: sketch lr: 0.000636
batch 1096 loss: 0.26785 acc: 0.92057 | v_loss: 0.45770 v_acc: 0.89616 |  iteration: 4826 teacher: 1 stage: sketch lr: 0.000636
batch 1097 loss: 0.23824 acc: 0.92969 | v_loss: 0.33349 v_acc: 0.92090 |  iteration: 4827 teacher: 0 stage: sketch lr: 0.000636
batch 1098 loss: 0.24984 acc: 0.92708 | v_loss: 0.58163 v_acc: 0.86849 |  iteration: 4828 teacher: 1 stage: sketch lr: 0.000636
batch 1099 loss: 0.20793 acc: 0.94141 | v_loss: 0.52003 v_acc: 0.86589 |  iteration: 4829 teacher: 1 stage: sketch lr: 0.000636
batch 1100 loss: 0.33100 acc: 0.90625 | v_loss: 0.57757 v_acc: 0.85905 |  iteration: 4830 teacher: 0 stage: sketch lr: 0.000636
batch 1101 loss: 0.27560 acc: 0.92318 | v_loss: 0.81047 v_acc: 0.84375 |  iteration: 4831 teacher: 0 stage: sketch lr: 0.000636
batch 1102 loss: 0.42126 acc: 0.88021 | v_loss: 0.44835 v_acc: 0.91016 |  iteration: 4832 teacher: 0 sta

batch 1160 loss: 0.35854 acc: 0.89258 | v_loss: 0.48118 v_acc: 0.89421 |  iteration: 4890 teacher: 0 stage: sketch lr: 0.000632
batch 1161 loss: 0.25951 acc: 0.92643 | v_loss: 0.37228 v_acc: 0.91374 |  iteration: 4891 teacher: 0 stage: sketch lr: 0.000632
batch 1162 loss: 0.23950 acc: 0.92383 | v_loss: 0.31869 v_acc: 0.92155 |  iteration: 4892 teacher: 0 stage: sketch lr: 0.000632
batch 1163 loss: 0.24159 acc: 0.92546 | v_loss: 0.50734 v_acc: 0.90625 |  iteration: 4893 teacher: 0 stage: sketch lr: 0.000632
batch 1164 loss: 0.25033 acc: 0.93229 | v_loss: 0.54474 v_acc: 0.88802 |  iteration: 4894 teacher: 0 stage: sketch lr: 0.000632
batch 1165 loss: 0.24312 acc: 0.93132 | v_loss: 0.84754 v_acc: 0.86165 |  iteration: 4895 teacher: 1 stage: sketch lr: 0.000632
batch 1166 loss: 0.30642 acc: 0.91406 | v_loss: 0.54332 v_acc: 0.88379 |  iteration: 4896 teacher: 1 stage: sketch lr: 0.000632
batch 1167 loss: 0.26435 acc: 0.92480 | v_loss: 0.66864 v_acc: 0.86589 |  iteration: 4897 teacher: 1 sta

batch 1225 loss: 0.24214 acc: 0.92448 | v_loss: 0.62933 v_acc: 0.88086 |  iteration: 4955 teacher: 0 stage: sketch lr: 0.000628
batch 1226 loss: 0.20120 acc: 0.94206 | v_loss: 0.56898 v_acc: 0.86426 |  iteration: 4956 teacher: 1 stage: sketch lr: 0.000628
batch 1227 loss: 0.24898 acc: 0.92708 | v_loss: 0.42519 v_acc: 0.90592 |  iteration: 4957 teacher: 0 stage: sketch lr: 0.000628
batch 1228 loss: 0.23826 acc: 0.92480 | v_loss: 0.78206 v_acc: 0.84896 |  iteration: 4958 teacher: 0 stage: sketch lr: 0.000628
batch 1229 loss: 0.26447 acc: 0.92285 | v_loss: 0.56080 v_acc: 0.89160 |  iteration: 4959 teacher: 1 stage: sketch lr: 0.000628
batch 1230 loss: 0.25910 acc: 0.92090 | v_loss: 0.59308 v_acc: 0.87500 |  iteration: 4960 teacher: 1 stage: sketch lr: 0.000628
batch 1231 loss: 0.24352 acc: 0.93099 | v_loss: 0.52395 v_acc: 0.88509 |  iteration: 4961 teacher: 0 stage: sketch lr: 0.000627
batch 1232 loss: 0.24308 acc: 0.92643 | v_loss: 0.42604 v_acc: 0.89844 |  iteration: 4962 teacher: 1 sta

batch 46 loss: 0.22728 acc: 0.92741 | v_loss: 0.59685 v_acc: 0.88249 |  iteration: 5019 teacher: 0 stage: sketch lr: 0.000624
batch 47 loss: 0.21854 acc: 0.93945 | v_loss: 0.51678 v_acc: 0.88574 |  iteration: 5020 teacher: 0 stage: sketch lr: 0.000624
batch 48 loss: 0.29833 acc: 0.91764 | v_loss: 0.52070 v_acc: 0.88184 |  iteration: 5021 teacher: 0 stage: sketch lr: 0.000624
batch 49 loss: 0.25332 acc: 0.93262 | v_loss: 0.46383 v_acc: 0.89290 |  iteration: 5022 teacher: 1 stage: sketch lr: 0.000624
batch 50 loss: 0.24833 acc: 0.92936 | v_loss: 0.46022 v_acc: 0.89779 |  iteration: 5023 teacher: 1 stage: sketch lr: 0.000624
batch 51 loss: 0.25437 acc: 0.92285 | v_loss: 0.51749 v_acc: 0.89095 |  iteration: 5024 teacher: 0 stage: sketch lr: 0.000624
batch 52 loss: 0.23053 acc: 0.92904 | v_loss: 0.62956 v_acc: 0.85677 |  iteration: 5025 teacher: 1 stage: sketch lr: 0.000623
batch 53 loss: 0.22000 acc: 0.93164 | v_loss: 0.55391 v_acc: 0.89160 |  iteration: 5026 teacher: 1 stage: sketch lr: 0

batch 111 loss: 0.22727 acc: 0.92871 | v_loss: 0.59739 v_acc: 0.87663 |  iteration: 5084 teacher: 1 stage: sketch lr: 0.000620
batch 112 loss: 0.25917 acc: 0.92188 | v_loss: 0.43255 v_acc: 0.90169 |  iteration: 5085 teacher: 1 stage: sketch lr: 0.000620
batch 113 loss: 0.34714 acc: 0.90169 | v_loss: 0.62873 v_acc: 0.86849 |  iteration: 5086 teacher: 0 stage: sketch lr: 0.000620
batch 114 loss: 0.28015 acc: 0.91471 | v_loss: 0.38168 v_acc: 0.90430 |  iteration: 5087 teacher: 1 stage: sketch lr: 0.000620
batch 115 loss: 0.32431 acc: 0.90690 | v_loss: 0.98889 v_acc: 0.80924 |  iteration: 5088 teacher: 0 stage: sketch lr: 0.000620
batch 116 loss: 0.30599 acc: 0.91113 | v_loss: 0.59513 v_acc: 0.87858 |  iteration: 5089 teacher: 0 stage: sketch lr: 0.000620
batch 117 loss: 0.26344 acc: 0.93164 | v_loss: 0.71875 v_acc: 0.85938 |  iteration: 5090 teacher: 1 stage: sketch lr: 0.000619
batch 118 loss: 0.29545 acc: 0.91081 | v_loss: 0.44497 v_acc: 0.89844 |  iteration: 5091 teacher: 1 stage: sket

batch 176 loss: 0.24390 acc: 0.93294 | v_loss: 0.37300 v_acc: 0.91764 |  iteration: 5149 teacher: 1 stage: sketch lr: 0.000616
batch 177 loss: 0.22020 acc: 0.93392 | v_loss: 0.95036 v_acc: 0.83008 |  iteration: 5150 teacher: 0 stage: sketch lr: 0.000616
batch 178 loss: 0.28904 acc: 0.91829 | v_loss: 0.66424 v_acc: 0.87793 |  iteration: 5151 teacher: 1 stage: sketch lr: 0.000616
batch 179 loss: 0.27939 acc: 0.92285 | v_loss: 0.70892 v_acc: 0.86361 |  iteration: 5152 teacher: 0 stage: sketch lr: 0.000616
batch 180 loss: 0.26850 acc: 0.92415 | v_loss: 0.51947 v_acc: 0.89062 |  iteration: 5153 teacher: 1 stage: sketch lr: 0.000616
batch 181 loss: 0.26113 acc: 0.92513 | v_loss: 0.49152 v_acc: 0.88802 |  iteration: 5154 teacher: 1 stage: sketch lr: 0.000616
batch 182 loss: 0.24252 acc: 0.92057 | v_loss: 0.56226 v_acc: 0.88835 |  iteration: 5155 teacher: 0 stage: sketch lr: 0.000616
batch 183 loss: 0.27646 acc: 0.91797 | v_loss: 0.50726 v_acc: 0.89681 |  iteration: 5156 teacher: 0 stage: sket

batch 241 loss: 0.28752 acc: 0.91536 | v_loss: 0.36721 v_acc: 0.90625 |  iteration: 5214 teacher: 0 stage: sketch lr: 0.000612
batch 242 loss: 0.29826 acc: 0.91764 | v_loss: 0.30956 v_acc: 0.93001 |  iteration: 5215 teacher: 1 stage: sketch lr: 0.000612
batch 243 loss: 0.33889 acc: 0.90365 | v_loss: 0.61967 v_acc: 0.87858 |  iteration: 5216 teacher: 0 stage: sketch lr: 0.000612
batch 244 loss: 0.30793 acc: 0.90885 | v_loss: 0.55285 v_acc: 0.88118 |  iteration: 5217 teacher: 0 stage: sketch lr: 0.000612
batch 245 loss: 0.35522 acc: 0.89290 | v_loss: 0.67157 v_acc: 0.85579 |  iteration: 5218 teacher: 1 stage: sketch lr: 0.000612
batch 246 loss: 0.25952 acc: 0.92285 | v_loss: 0.44416 v_acc: 0.89193 |  iteration: 5219 teacher: 1 stage: sketch lr: 0.000612
batch 247 loss: 0.21175 acc: 0.94238 | v_loss: 0.54401 v_acc: 0.86589 |  iteration: 5220 teacher: 1 stage: sketch lr: 0.000612
batch 248 loss: 0.31938 acc: 0.90658 | v_loss: 0.64426 v_acc: 0.85482 |  iteration: 5221 teacher: 0 stage: sket

batch 306 loss: 0.28893 acc: 0.91536 | v_loss: 0.54810 v_acc: 0.89290 |  iteration: 5279 teacher: 1 stage: sketch lr: 0.000608
batch 307 loss: 0.26864 acc: 0.92220 | v_loss: 0.57269 v_acc: 0.87207 |  iteration: 5280 teacher: 1 stage: sketch lr: 0.000608
batch 308 loss: 0.24832 acc: 0.92773 | v_loss: 0.67265 v_acc: 0.85059 |  iteration: 5281 teacher: 1 stage: sketch lr: 0.000608
batch 309 loss: 0.25051 acc: 0.92936 | v_loss: 0.44986 v_acc: 0.89583 |  iteration: 5282 teacher: 1 stage: sketch lr: 0.000608
batch 310 loss: 0.31990 acc: 0.90918 | v_loss: 0.52814 v_acc: 0.86816 |  iteration: 5283 teacher: 0 stage: sketch lr: 0.000608
batch 311 loss: 0.24987 acc: 0.92936 | v_loss: 0.52370 v_acc: 0.87565 |  iteration: 5284 teacher: 0 stage: sketch lr: 0.000608
batch 312 loss: 0.24658 acc: 0.92546 | v_loss: 0.81166 v_acc: 0.84733 |  iteration: 5285 teacher: 1 stage: sketch lr: 0.000608
batch 313 loss: 0.25237 acc: 0.92839 | v_loss: 0.58248 v_acc: 0.88249 |  iteration: 5286 teacher: 0 stage: sket

batch 371 loss: 0.21765 acc: 0.93164 | v_loss: 0.56113 v_acc: 0.86100 |  iteration: 5344 teacher: 0 stage: sketch lr: 0.000605
batch 372 loss: 0.26915 acc: 0.92220 | v_loss: 0.59535 v_acc: 0.87044 |  iteration: 5345 teacher: 1 stage: sketch lr: 0.000604
batch 373 loss: 0.28678 acc: 0.91797 | v_loss: 0.85749 v_acc: 0.84408 |  iteration: 5346 teacher: 0 stage: sketch lr: 0.000604
batch 374 loss: 0.22218 acc: 0.94271 | v_loss: 0.43468 v_acc: 0.90788 |  iteration: 5347 teacher: 0 stage: sketch lr: 0.000604
batch 375 loss: 0.33715 acc: 0.90755 | v_loss: 0.59784 v_acc: 0.88281 |  iteration: 5348 teacher: 0 stage: sketch lr: 0.000604
batch 376 loss: 0.22943 acc: 0.93099 | v_loss: 0.56873 v_acc: 0.89128 |  iteration: 5349 teacher: 0 stage: sketch lr: 0.000604
batch 377 loss: 0.24864 acc: 0.92969 | v_loss: 0.42505 v_acc: 0.90560 |  iteration: 5350 teacher: 1 stage: sketch lr: 0.000604
batch 378 loss: 0.29832 acc: 0.91016 | v_loss: 1.16211 v_acc: 0.80078 |  iteration: 5351 teacher: 0 stage: sket

batch 436 loss: 0.31716 acc: 0.91276 | v_loss: 0.51832 v_acc: 0.88118 |  iteration: 5409 teacher: 1 stage: sketch lr: 0.000601
batch 437 loss: 0.26641 acc: 0.92611 | v_loss: 0.79679 v_acc: 0.86100 |  iteration: 5410 teacher: 1 stage: sketch lr: 0.000601
batch 438 loss: 0.26955 acc: 0.92188 | v_loss: 0.51167 v_acc: 0.88672 |  iteration: 5411 teacher: 1 stage: sketch lr: 0.000601
batch 439 loss: 0.23775 acc: 0.92480 | v_loss: 0.68300 v_acc: 0.86458 |  iteration: 5412 teacher: 0 stage: sketch lr: 0.000601
batch 440 loss: 0.26543 acc: 0.92448 | v_loss: 0.44764 v_acc: 0.89779 |  iteration: 5413 teacher: 0 stage: sketch lr: 0.000601
batch 441 loss: 0.21321 acc: 0.93717 | v_loss: 0.57717 v_acc: 0.88346 |  iteration: 5414 teacher: 1 stage: sketch lr: 0.000601
batch 442 loss: 0.28954 acc: 0.92057 | v_loss: 0.79674 v_acc: 0.84115 |  iteration: 5415 teacher: 1 stage: sketch lr: 0.000601
batch 443 loss: 0.30297 acc: 0.91667 | v_loss: 0.54249 v_acc: 0.89290 |  iteration: 5416 teacher: 1 stage: sket

batch 501 loss: 0.25997 acc: 0.93359 | v_loss: 0.62605 v_acc: 0.88477 |  iteration: 5474 teacher: 1 stage: sketch lr: 0.000597
batch 502 loss: 0.37509 acc: 0.89290 | v_loss: 0.57015 v_acc: 0.87467 |  iteration: 5475 teacher: 0 stage: sketch lr: 0.000597
batch 503 loss: 0.26568 acc: 0.93066 | v_loss: 0.57587 v_acc: 0.87826 |  iteration: 5476 teacher: 0 stage: sketch lr: 0.000597
batch 504 loss: 0.23718 acc: 0.93783 | v_loss: 0.49301 v_acc: 0.88867 |  iteration: 5477 teacher: 1 stage: sketch lr: 0.000597
batch 505 loss: 0.30520 acc: 0.92090 | v_loss: 0.46309 v_acc: 0.89258 |  iteration: 5478 teacher: 1 stage: sketch lr: 0.000597
batch 506 loss: 0.33166 acc: 0.90983 | v_loss: 0.49621 v_acc: 0.88574 |  iteration: 5479 teacher: 0 stage: sketch lr: 0.000597
batch 507 loss: 0.22321 acc: 0.92578 | v_loss: 0.57042 v_acc: 0.85938 |  iteration: 5480 teacher: 0 stage: sketch lr: 0.000597
batch 508 loss: 0.31113 acc: 0.91113 | v_loss: 0.53423 v_acc: 0.88997 |  iteration: 5481 teacher: 0 stage: sket

batch 566 loss: 0.37335 acc: 0.89160 | v_loss: 0.60357 v_acc: 0.87467 |  iteration: 5539 teacher: 1 stage: sketch lr: 0.000594
batch 567 loss: 0.22273 acc: 0.93490 | v_loss: 0.42876 v_acc: 0.90820 |  iteration: 5540 teacher: 1 stage: sketch lr: 0.000594
batch 568 loss: 0.26998 acc: 0.91732 | v_loss: 0.63532 v_acc: 0.87142 |  iteration: 5541 teacher: 1 stage: sketch lr: 0.000594
batch 569 loss: 0.27384 acc: 0.91536 | v_loss: 0.37001 v_acc: 0.90332 |  iteration: 5542 teacher: 0 stage: sketch lr: 0.000594
batch 570 loss: 0.31069 acc: 0.90365 | v_loss: 0.99485 v_acc: 0.81348 |  iteration: 5543 teacher: 1 stage: sketch lr: 0.000594
batch 571 loss: 0.24590 acc: 0.92155 | v_loss: 0.54764 v_acc: 0.88346 |  iteration: 5544 teacher: 0 stage: sketch lr: 0.000594
batch 572 loss: 0.25684 acc: 0.93132 | v_loss: 0.69781 v_acc: 0.86003 |  iteration: 5545 teacher: 1 stage: sketch lr: 0.000593
batch 573 loss: 0.31373 acc: 0.90885 | v_loss: 0.44094 v_acc: 0.89583 |  iteration: 5546 teacher: 0 stage: sket

batch 631 loss: 0.16185 acc: 0.94889 | v_loss: 0.36198 v_acc: 0.91732 |  iteration: 5604 teacher: 1 stage: sketch lr: 0.000590
batch 632 loss: 0.34143 acc: 0.90625 | v_loss: 0.93441 v_acc: 0.83040 |  iteration: 5605 teacher: 1 stage: sketch lr: 0.000590
batch 633 loss: 0.20471 acc: 0.93848 | v_loss: 0.59896 v_acc: 0.87923 |  iteration: 5606 teacher: 0 stage: sketch lr: 0.000590
batch 634 loss: 0.32543 acc: 0.90365 | v_loss: 0.70277 v_acc: 0.86068 |  iteration: 5607 teacher: 1 stage: sketch lr: 0.000590
batch 635 loss: 0.19885 acc: 0.93978 | v_loss: 0.52928 v_acc: 0.88411 |  iteration: 5608 teacher: 1 stage: sketch lr: 0.000590
batch 636 loss: 0.27822 acc: 0.92155 | v_loss: 0.52292 v_acc: 0.88346 |  iteration: 5609 teacher: 1 stage: sketch lr: 0.000590
batch 637 loss: 0.22985 acc: 0.93066 | v_loss: 0.61447 v_acc: 0.87858 |  iteration: 5610 teacher: 0 stage: sketch lr: 0.000590
batch 638 loss: 0.28193 acc: 0.92090 | v_loss: 0.51797 v_acc: 0.88509 |  iteration: 5611 teacher: 1 stage: sket

batch 696 loss: 0.30152 acc: 0.91569 | v_loss: 0.34612 v_acc: 0.90723 |  iteration: 5669 teacher: 1 stage: sketch lr: 0.000587
batch 697 loss: 0.30736 acc: 0.91471 | v_loss: 0.31698 v_acc: 0.92350 |  iteration: 5670 teacher: 0 stage: sketch lr: 0.000587
batch 698 loss: 0.29970 acc: 0.91927 | v_loss: 0.56243 v_acc: 0.88053 |  iteration: 5671 teacher: 1 stage: sketch lr: 0.000587
batch 699 loss: 0.24749 acc: 0.92122 | v_loss: 0.56121 v_acc: 0.88672 |  iteration: 5672 teacher: 0 stage: sketch lr: 0.000587
batch 700 loss: 0.28182 acc: 0.92611 | v_loss: 0.68992 v_acc: 0.85026 |  iteration: 5673 teacher: 1 stage: sketch lr: 0.000587
batch 701 loss: 0.24259 acc: 0.92936 | v_loss: 0.46003 v_acc: 0.89648 |  iteration: 5674 teacher: 0 stage: sketch lr: 0.000587
batch 702 loss: 0.44239 acc: 0.87240 | v_loss: 0.48502 v_acc: 0.87891 |  iteration: 5675 teacher: 1 stage: sketch lr: 0.000587
batch 703 loss: 0.37400 acc: 0.89974 | v_loss: 0.56646 v_acc: 0.86947 |  iteration: 5676 teacher: 1 stage: sket

batch 761 loss: 0.25394 acc: 0.92643 | v_loss: 0.52052 v_acc: 0.89583 |  iteration: 5734 teacher: 1 stage: sketch lr: 0.000584
batch 762 loss: 0.39374 acc: 0.88835 | v_loss: 0.50656 v_acc: 0.88770 |  iteration: 5735 teacher: 1 stage: sketch lr: 0.000584
batch 763 loss: 0.25933 acc: 0.92448 | v_loss: 0.65953 v_acc: 0.86230 |  iteration: 5736 teacher: 1 stage: sketch lr: 0.000584
batch 764 loss: 0.37274 acc: 0.89909 | v_loss: 0.44919 v_acc: 0.89648 |  iteration: 5737 teacher: 1 stage: sketch lr: 0.000583
batch 765 loss: 0.22725 acc: 0.93164 | v_loss: 0.50331 v_acc: 0.87337 |  iteration: 5738 teacher: 1 stage: sketch lr: 0.000583
batch 766 loss: 0.24386 acc: 0.92546 | v_loss: 0.51767 v_acc: 0.88118 |  iteration: 5739 teacher: 1 stage: sketch lr: 0.000583
batch 767 loss: 0.27251 acc: 0.92057 | v_loss: 0.75950 v_acc: 0.85710 |  iteration: 5740 teacher: 0 stage: sketch lr: 0.000583
batch 768 loss: 0.21724 acc: 0.93620 | v_loss: 0.53308 v_acc: 0.88704 |  iteration: 5741 teacher: 1 stage: sket

batch 826 loss: 0.23652 acc: 0.92708 | v_loss: 0.57081 v_acc: 0.86523 |  iteration: 5799 teacher: 0 stage: sketch lr: 0.000580
batch 827 loss: 0.26702 acc: 0.91667 | v_loss: 0.62371 v_acc: 0.86523 |  iteration: 5800 teacher: 0 stage: sketch lr: 0.000580
batch 828 loss: 0.23801 acc: 0.92285 | v_loss: 0.78666 v_acc: 0.84701 |  iteration: 5801 teacher: 0 stage: sketch lr: 0.000580
batch 829 loss: 0.26886 acc: 0.92741 | v_loss: 0.39484 v_acc: 0.91081 |  iteration: 5802 teacher: 1 stage: sketch lr: 0.000580
batch 830 loss: 0.24125 acc: 0.92350 | v_loss: 0.57098 v_acc: 0.87630 |  iteration: 5803 teacher: 0 stage: sketch lr: 0.000580
batch 831 loss: 0.25423 acc: 0.92415 | v_loss: 0.47894 v_acc: 0.89518 |  iteration: 5804 teacher: 1 stage: sketch lr: 0.000580
batch 832 loss: 0.19676 acc: 0.94271 | v_loss: 0.43023 v_acc: 0.90267 |  iteration: 5805 teacher: 0 stage: sketch lr: 0.000580
batch 833 loss: 0.22273 acc: 0.93359 | v_loss: 1.16193 v_acc: 0.80859 |  iteration: 5806 teacher: 0 stage: sket

batch 891 loss: 0.23673 acc: 0.92448 | v_loss: 0.44951 v_acc: 0.89811 |  iteration: 5864 teacher: 0 stage: sketch lr: 0.000577
batch 892 loss: 0.25141 acc: 0.92448 | v_loss: 0.77395 v_acc: 0.85807 |  iteration: 5865 teacher: 0 stage: sketch lr: 0.000577
batch 893 loss: 0.26646 acc: 0.91992 | v_loss: 0.53307 v_acc: 0.88346 |  iteration: 5866 teacher: 0 stage: sketch lr: 0.000577
batch 894 loss: 0.23281 acc: 0.92676 | v_loss: 0.61337 v_acc: 0.87207 |  iteration: 5867 teacher: 1 stage: sketch lr: 0.000577
batch 895 loss: 0.19767 acc: 0.93815 | v_loss: 0.42265 v_acc: 0.89779 |  iteration: 5868 teacher: 1 stage: sketch lr: 0.000577
batch 896 loss: 0.35865 acc: 0.90234 | v_loss: 0.54889 v_acc: 0.88281 |  iteration: 5869 teacher: 0 stage: sketch lr: 0.000577
batch 897 loss: 0.32131 acc: 0.90918 | v_loss: 0.79354 v_acc: 0.83984 |  iteration: 5870 teacher: 0 stage: sketch lr: 0.000577
batch 898 loss: 0.26256 acc: 0.91960 | v_loss: 0.53496 v_acc: 0.89030 |  iteration: 5871 teacher: 0 stage: sket

batch 956 loss: 0.30826 acc: 0.91374 | v_loss: 0.58440 v_acc: 0.87891 |  iteration: 5929 teacher: 1 stage: sketch lr: 0.000574
batch 957 loss: 0.22949 acc: 0.92578 | v_loss: 0.56262 v_acc: 0.87500 |  iteration: 5930 teacher: 0 stage: sketch lr: 0.000574
batch 958 loss: 0.28413 acc: 0.91895 | v_loss: 0.54834 v_acc: 0.87663 |  iteration: 5931 teacher: 0 stage: sketch lr: 0.000574
batch 959 loss: 0.28647 acc: 0.91699 | v_loss: 0.45743 v_acc: 0.89681 |  iteration: 5932 teacher: 1 stage: sketch lr: 0.000574
batch 960 loss: 0.21554 acc: 0.93392 | v_loss: 0.46960 v_acc: 0.89421 |  iteration: 5933 teacher: 0 stage: sketch lr: 0.000574
batch 961 loss: 0.23363 acc: 0.93229 | v_loss: 0.54932 v_acc: 0.88118 |  iteration: 5934 teacher: 0 stage: sketch lr: 0.000574
batch 962 loss: 0.32359 acc: 0.90397 | v_loss: 0.58516 v_acc: 0.85677 |  iteration: 5935 teacher: 0 stage: sketch lr: 0.000574
batch 963 loss: 0.29777 acc: 0.91146 | v_loss: 0.58340 v_acc: 0.88411 |  iteration: 5936 teacher: 0 stage: sket

batch 1021 loss: 0.33653 acc: 0.89648 | v_loss: 0.60091 v_acc: 0.86882 |  iteration: 5994 teacher: 0 stage: sketch lr: 0.000571
batch 1022 loss: 0.25442 acc: 0.91829 | v_loss: 0.42257 v_acc: 0.90885 |  iteration: 5995 teacher: 0 stage: sketch lr: 0.000571
batch 1023 loss: 0.28680 acc: 0.92025 | v_loss: 0.63408 v_acc: 0.87012 |  iteration: 5996 teacher: 0 stage: sketch lr: 0.000571
batch 1024 loss: 0.27204 acc: 0.93229 | v_loss: 0.36485 v_acc: 0.91113 |  iteration: 5997 teacher: 1 stage: sketch lr: 0.000571
batch 1025 loss: 0.34887 acc: 0.90234 | v_loss: 0.98205 v_acc: 0.82357 |  iteration: 5998 teacher: 1 stage: sketch lr: 0.000571
batch 1026 loss: 0.22785 acc: 0.93457 | v_loss: 0.57157 v_acc: 0.88216 |  iteration: 5999 teacher: 1 stage: sketch lr: 0.000571
batch 1027 loss: 0.23453 acc: 0.93294 | v_loss: 0.69295 v_acc: 0.86426 |  iteration: 6000 teacher: 0 stage: sketch lr: 0.000571
batch 1028 loss: 0.27531 acc: 0.91634 | v_loss: 0.40901 v_acc: 0.90885 |  iteration: 6001 teacher: 0 sta

batch 1086 loss: 0.27017 acc: 0.92057 | v_loss: 0.35697 v_acc: 0.92611 |  iteration: 6059 teacher: 0 stage: sketch lr: 0.000568
batch 1087 loss: 0.27178 acc: 0.92773 | v_loss: 0.95188 v_acc: 0.82161 |  iteration: 6060 teacher: 1 stage: sketch lr: 0.000568
batch 1088 loss: 0.30766 acc: 0.91439 | v_loss: 0.63334 v_acc: 0.87044 |  iteration: 6061 teacher: 1 stage: sketch lr: 0.000568
batch 1089 loss: 0.30672 acc: 0.90495 | v_loss: 0.64841 v_acc: 0.86751 |  iteration: 6062 teacher: 0 stage: sketch lr: 0.000568
batch 1090 loss: 0.26958 acc: 0.92057 | v_loss: 0.54217 v_acc: 0.89290 |  iteration: 6063 teacher: 0 stage: sketch lr: 0.000568
batch 1091 loss: 0.23284 acc: 0.93132 | v_loss: 0.46896 v_acc: 0.89290 |  iteration: 6064 teacher: 0 stage: sketch lr: 0.000568
batch 1092 loss: 0.25209 acc: 0.92741 | v_loss: 0.58336 v_acc: 0.88672 |  iteration: 6065 teacher: 1 stage: sketch lr: 0.000567
batch 1093 loss: 0.31556 acc: 0.91211 | v_loss: 0.53235 v_acc: 0.89128 |  iteration: 6066 teacher: 1 sta

batch 1151 loss: 0.27341 acc: 0.91927 | v_loss: 0.39056 v_acc: 0.90332 |  iteration: 6124 teacher: 1 stage: sketch lr: 0.000565
batch 1152 loss: 0.25027 acc: 0.91829 | v_loss: 0.30098 v_acc: 0.92122 |  iteration: 6125 teacher: 0 stage: sketch lr: 0.000565
batch 1153 loss: 0.24058 acc: 0.92676 | v_loss: 0.53639 v_acc: 0.88118 |  iteration: 6126 teacher: 1 stage: sketch lr: 0.000565
batch 1154 loss: 0.34688 acc: 0.90983 | v_loss: 0.54728 v_acc: 0.89062 |  iteration: 6127 teacher: 1 stage: sketch lr: 0.000565
batch 1155 loss: 0.33954 acc: 0.90625 | v_loss: 0.65088 v_acc: 0.86882 |  iteration: 6128 teacher: 0 stage: sketch lr: 0.000565
batch 1156 loss: 0.24254 acc: 0.92806 | v_loss: 0.41996 v_acc: 0.90332 |  iteration: 6129 teacher: 0 stage: sketch lr: 0.000565
batch 1157 loss: 0.21540 acc: 0.93555 | v_loss: 0.49833 v_acc: 0.87695 |  iteration: 6130 teacher: 0 stage: sketch lr: 0.000564
batch 1158 loss: 0.29720 acc: 0.91732 | v_loss: 0.63047 v_acc: 0.86882 |  iteration: 6131 teacher: 1 sta

batch 1216 loss: 0.23363 acc: 0.93197 | v_loss: 0.51960 v_acc: 0.89323 |  iteration: 6189 teacher: 0 stage: sketch lr: 0.000562
batch 1217 loss: 0.24455 acc: 0.93001 | v_loss: 0.56705 v_acc: 0.87533 |  iteration: 6190 teacher: 1 stage: sketch lr: 0.000562
batch 1218 loss: 0.20644 acc: 0.93359 | v_loss: 0.67401 v_acc: 0.85091 |  iteration: 6191 teacher: 1 stage: sketch lr: 0.000562
batch 1219 loss: 0.27971 acc: 0.91699 | v_loss: 0.47292 v_acc: 0.89616 |  iteration: 6192 teacher: 1 stage: sketch lr: 0.000562
batch 1220 loss: 0.27257 acc: 0.92350 | v_loss: 0.48918 v_acc: 0.87598 |  iteration: 6193 teacher: 1 stage: sketch lr: 0.000562
batch 1221 loss: 0.27626 acc: 0.92188 | v_loss: 0.52170 v_acc: 0.87305 |  iteration: 6194 teacher: 0 stage: sketch lr: 0.000562
batch 1222 loss: 0.42968 acc: 0.88118 | v_loss: 0.72909 v_acc: 0.85286 |  iteration: 6195 teacher: 1 stage: sketch lr: 0.000561
batch 1223 loss: 0.29359 acc: 0.91667 | v_loss: 0.52178 v_acc: 0.88867 |  iteration: 6196 teacher: 1 sta

batch 37 loss: 0.29579 acc: 0.91374 | v_loss: 0.53537 v_acc: 0.87044 |  iteration: 6253 teacher: 0 stage: sketch lr: 0.000559
batch 38 loss: 0.21346 acc: 0.93620 | v_loss: 0.57139 v_acc: 0.86719 |  iteration: 6254 teacher: 1 stage: sketch lr: 0.000559
batch 39 loss: 0.26551 acc: 0.92773 | v_loss: 0.72605 v_acc: 0.85059 |  iteration: 6255 teacher: 1 stage: sketch lr: 0.000559
batch 40 loss: 0.28815 acc: 0.91309 | v_loss: 0.55959 v_acc: 0.87956 |  iteration: 6256 teacher: 0 stage: sketch lr: 0.000559
batch 41 loss: 0.33402 acc: 0.89844 | v_loss: 0.53129 v_acc: 0.88639 |  iteration: 6257 teacher: 1 stage: sketch lr: 0.000559
batch 42 loss: 0.29147 acc: 0.91211 | v_loss: 0.63558 v_acc: 0.87240 |  iteration: 6258 teacher: 0 stage: sketch lr: 0.000559
batch 43 loss: 0.35472 acc: 0.90169 | v_loss: 0.60105 v_acc: 0.85417 |  iteration: 6259 teacher: 1 stage: sketch lr: 0.000559
batch 44 loss: 0.22380 acc: 0.93229 | v_loss: 0.45864 v_acc: 0.89323 |  iteration: 6260 teacher: 0 stage: sketch lr: 0

batch 103 loss: 0.27738 acc: 0.92415 | v_loss: 0.52604 v_acc: 0.88965 |  iteration: 6319 teacher: 1 stage: sketch lr: 0.000556
batch 104 loss: 0.19247 acc: 0.94434 | v_loss: 0.45754 v_acc: 0.89844 |  iteration: 6320 teacher: 1 stage: sketch lr: 0.000556
batch 105 loss: 0.32079 acc: 0.91439 | v_loss: 1.11894 v_acc: 0.80697 |  iteration: 6321 teacher: 1 stage: sketch lr: 0.000556
batch 106 loss: 0.39628 acc: 0.89453 | v_loss: 0.38842 v_acc: 0.91602 |  iteration: 6322 teacher: 1 stage: sketch lr: 0.000556
batch 107 loss: 0.23218 acc: 0.93359 | v_loss: 0.60501 v_acc: 0.87012 |  iteration: 6323 teacher: 0 stage: sketch lr: 0.000556
batch 108 loss: 0.36173 acc: 0.89551 | v_loss: 0.54608 v_acc: 0.88932 |  iteration: 6324 teacher: 1 stage: sketch lr: 0.000556
batch 109 loss: 0.25730 acc: 0.93001 | v_loss: 0.42528 v_acc: 0.90234 |  iteration: 6325 teacher: 0 stage: sketch lr: 0.000556
batch 110 loss: 0.29537 acc: 0.91634 | v_loss: 0.52840 v_acc: 0.89486 |  iteration: 6326 teacher: 0 stage: sket

batch 168 loss: 0.23753 acc: 0.92904 | v_loss: 0.53132 v_acc: 0.87760 |  iteration: 6384 teacher: 1 stage: sketch lr: 0.000553
batch 169 loss: 0.34028 acc: 0.89583 | v_loss: 0.75671 v_acc: 0.85124 |  iteration: 6385 teacher: 0 stage: sketch lr: 0.000553
batch 170 loss: 0.28223 acc: 0.91243 | v_loss: 0.51863 v_acc: 0.89128 |  iteration: 6386 teacher: 0 stage: sketch lr: 0.000553
batch 171 loss: 0.22421 acc: 0.93099 | v_loss: 0.51067 v_acc: 0.88737 |  iteration: 6387 teacher: 0 stage: sketch lr: 0.000553
batch 172 loss: 0.22808 acc: 0.93457 | v_loss: 0.53374 v_acc: 0.87760 |  iteration: 6388 teacher: 1 stage: sketch lr: 0.000553
batch 173 loss: 0.29130 acc: 0.91634 | v_loss: 0.41852 v_acc: 0.91211 |  iteration: 6389 teacher: 1 stage: sketch lr: 0.000553
batch 174 loss: 0.22442 acc: 0.93392 | v_loss: 0.48878 v_acc: 0.90462 |  iteration: 6390 teacher: 1 stage: sketch lr: 0.000553
batch 175 loss: 0.29350 acc: 0.92090 | v_loss: 0.61856 v_acc: 0.86361 |  iteration: 6391 teacher: 1 stage: sket

batch 233 loss: 0.28489 acc: 0.91862 | v_loss: 0.52240 v_acc: 0.88216 |  iteration: 6449 teacher: 1 stage: sketch lr: 0.000550
batch 234 loss: 0.27890 acc: 0.92546 | v_loss: 0.61612 v_acc: 0.84505 |  iteration: 6450 teacher: 1 stage: sketch lr: 0.000550
batch 235 loss: 0.20110 acc: 0.93424 | v_loss: 0.54452 v_acc: 0.88835 |  iteration: 6451 teacher: 0 stage: sketch lr: 0.000550
batch 236 loss: 0.33893 acc: 0.90690 | v_loss: 0.59311 v_acc: 0.87891 |  iteration: 6452 teacher: 0 stage: sketch lr: 0.000550
batch 237 loss: 0.27791 acc: 0.92383 | v_loss: 0.35899 v_acc: 0.90983 |  iteration: 6453 teacher: 1 stage: sketch lr: 0.000550
batch 238 loss: 0.27235 acc: 0.92415 | v_loss: 0.52703 v_acc: 0.88900 |  iteration: 6454 teacher: 1 stage: sketch lr: 0.000550
batch 239 loss: 0.21834 acc: 0.93685 | v_loss: 0.50914 v_acc: 0.88184 |  iteration: 6455 teacher: 1 stage: sketch lr: 0.000550
batch 240 loss: 0.26609 acc: 0.91341 | v_loss: 0.82385 v_acc: 0.83333 |  iteration: 6456 teacher: 0 stage: sket

batch 298 loss: 0.24164 acc: 0.92676 | v_loss: 0.61787 v_acc: 0.87858 |  iteration: 6514 teacher: 1 stage: sketch lr: 0.000548
batch 299 loss: 0.27406 acc: 0.91504 | v_loss: 0.67250 v_acc: 0.87109 |  iteration: 6515 teacher: 0 stage: sketch lr: 0.000548
batch 300 loss: 0.29931 acc: 0.91862 | v_loss: 0.42011 v_acc: 0.90527 |  iteration: 6516 teacher: 0 stage: sketch lr: 0.000547
batch 301 loss: 0.20566 acc: 0.93978 | v_loss: 0.66826 v_acc: 0.85710 |  iteration: 6517 teacher: 1 stage: sketch lr: 0.000547
batch 302 loss: 0.26173 acc: 0.91960 | v_loss: 0.43419 v_acc: 0.89453 |  iteration: 6518 teacher: 0 stage: sketch lr: 0.000547
batch 303 loss: 0.26295 acc: 0.92708 | v_loss: 0.46867 v_acc: 0.88802 |  iteration: 6519 teacher: 0 stage: sketch lr: 0.000547
batch 304 loss: 0.31773 acc: 0.90527 | v_loss: 0.43356 v_acc: 0.89290 |  iteration: 6520 teacher: 0 stage: sketch lr: 0.000547
batch 305 loss: 0.29061 acc: 0.91374 | v_loss: 0.73376 v_acc: 0.85156 |  iteration: 6521 teacher: 0 stage: sket

batch 363 loss: 0.25433 acc: 0.92839 | v_loss: 0.45538 v_acc: 0.89193 |  iteration: 6579 teacher: 1 stage: sketch lr: 0.000545
batch 364 loss: 0.27493 acc: 0.91699 | v_loss: 0.55922 v_acc: 0.88737 |  iteration: 6580 teacher: 0 stage: sketch lr: 0.000545
batch 365 loss: 0.21197 acc: 0.93717 | v_loss: 0.52396 v_acc: 0.89583 |  iteration: 6581 teacher: 1 stage: sketch lr: 0.000545
batch 366 loss: 0.28337 acc: 0.92090 | v_loss: 0.51060 v_acc: 0.88737 |  iteration: 6582 teacher: 1 stage: sketch lr: 0.000545
batch 367 loss: 0.38707 acc: 0.89974 | v_loss: 0.50507 v_acc: 0.90007 |  iteration: 6583 teacher: 1 stage: sketch lr: 0.000545
batch 368 loss: 0.26805 acc: 0.92220 | v_loss: 0.49832 v_acc: 0.88835 |  iteration: 6584 teacher: 1 stage: sketch lr: 0.000545
batch 369 loss: 0.22928 acc: 0.92969 | v_loss: 0.36736 v_acc: 0.91732 |  iteration: 6585 teacher: 0 stage: sketch lr: 0.000545
batch 370 loss: 0.25534 acc: 0.92741 | v_loss: 0.65514 v_acc: 0.85872 |  iteration: 6586 teacher: 1 stage: sket

batch 428 loss: 0.23877 acc: 0.93099 | v_loss: 0.41972 v_acc: 0.89583 |  iteration: 6644 teacher: 0 stage: sketch lr: 0.000542
batch 429 loss: 0.23523 acc: 0.93066 | v_loss: 0.46866 v_acc: 0.87695 |  iteration: 6645 teacher: 0 stage: sketch lr: 0.000542
batch 430 loss: 0.26821 acc: 0.92025 | v_loss: 0.58252 v_acc: 0.87109 |  iteration: 6646 teacher: 0 stage: sketch lr: 0.000542
batch 431 loss: 0.28500 acc: 0.91667 | v_loss: 0.65837 v_acc: 0.85059 |  iteration: 6647 teacher: 0 stage: sketch lr: 0.000542
batch 432 loss: 0.23557 acc: 0.93620 | v_loss: 0.47763 v_acc: 0.89779 |  iteration: 6648 teacher: 1 stage: sketch lr: 0.000542
batch 433 loss: 0.24182 acc: 0.92318 | v_loss: 0.38747 v_acc: 0.90267 |  iteration: 6649 teacher: 1 stage: sketch lr: 0.000542
batch 434 loss: 0.27530 acc: 0.91862 | v_loss: 0.34198 v_acc: 0.91569 |  iteration: 6650 teacher: 0 stage: sketch lr: 0.000542
batch 435 loss: 0.29625 acc: 0.91797 | v_loss: 0.43008 v_acc: 0.90104 |  iteration: 6651 teacher: 0 stage: sket

batch 493 loss: 0.29684 acc: 0.91797 | v_loss: 0.54926 v_acc: 0.86719 |  iteration: 6709 teacher: 0 stage: sketch lr: 0.000540
batch 494 loss: 0.31971 acc: 0.90234 | v_loss: 0.73820 v_acc: 0.84049 |  iteration: 6710 teacher: 0 stage: sketch lr: 0.000540
batch 495 loss: 0.30180 acc: 0.91309 | v_loss: 0.53291 v_acc: 0.87793 |  iteration: 6711 teacher: 1 stage: sketch lr: 0.000539
batch 496 loss: 0.30380 acc: 0.90495 | v_loss: 0.59587 v_acc: 0.87174 |  iteration: 6712 teacher: 0 stage: sketch lr: 0.000539
batch 497 loss: 0.26705 acc: 0.92155 | v_loss: 0.65517 v_acc: 0.86979 |  iteration: 6713 teacher: 0 stage: sketch lr: 0.000539
batch 498 loss: 0.28464 acc: 0.91569 | v_loss: 0.55210 v_acc: 0.86198 |  iteration: 6714 teacher: 1 stage: sketch lr: 0.000539
batch 499 loss: 0.22010 acc: 0.93262 | v_loss: 0.47238 v_acc: 0.89388 |  iteration: 6715 teacher: 1 stage: sketch lr: 0.000539
batch 500 loss: 0.26845 acc: 0.91406 | v_loss: 0.73596 v_acc: 0.84115 |  iteration: 6716 teacher: 0 stage: sket

batch 558 loss: 0.28656 acc: 0.91536 | v_loss: 0.51945 v_acc: 0.89193 |  iteration: 6774 teacher: 0 stage: sketch lr: 0.000537
batch 559 loss: 0.22665 acc: 0.92806 | v_loss: 0.44523 v_acc: 0.89941 |  iteration: 6775 teacher: 1 stage: sketch lr: 0.000537
batch 560 loss: 0.30113 acc: 0.91374 | v_loss: 1.12268 v_acc: 0.80273 |  iteration: 6776 teacher: 0 stage: sketch lr: 0.000537
batch 561 loss: 0.22422 acc: 0.93880 | v_loss: 0.39210 v_acc: 0.91243 |  iteration: 6777 teacher: 1 stage: sketch lr: 0.000537
batch 562 loss: 0.20979 acc: 0.93620 | v_loss: 0.58210 v_acc: 0.87728 |  iteration: 6778 teacher: 0 stage: sketch lr: 0.000537
batch 563 loss: 0.34767 acc: 0.90039 | v_loss: 0.51631 v_acc: 0.89421 |  iteration: 6779 teacher: 0 stage: sketch lr: 0.000537
batch 564 loss: 0.38920 acc: 0.89290 | v_loss: 0.39718 v_acc: 0.91048 |  iteration: 6780 teacher: 0 stage: sketch lr: 0.000537
batch 565 loss: 0.33877 acc: 0.90918 | v_loss: 0.47115 v_acc: 0.89486 |  iteration: 6781 teacher: 0 stage: sket

batch 623 loss: 0.32572 acc: 0.90592 | v_loss: 0.56216 v_acc: 0.87891 |  iteration: 6839 teacher: 1 stage: sketch lr: 0.000534
batch 624 loss: 0.36694 acc: 0.89876 | v_loss: 0.75410 v_acc: 0.83887 |  iteration: 6840 teacher: 1 stage: sketch lr: 0.000534
batch 625 loss: 0.25363 acc: 0.92578 | v_loss: 0.53702 v_acc: 0.88672 |  iteration: 6841 teacher: 0 stage: sketch lr: 0.000534
batch 626 loss: 0.31698 acc: 0.90788 | v_loss: 0.49526 v_acc: 0.88672 |  iteration: 6842 teacher: 1 stage: sketch lr: 0.000534
batch 627 loss: 0.27116 acc: 0.92318 | v_loss: 0.48727 v_acc: 0.88249 |  iteration: 6843 teacher: 1 stage: sketch lr: 0.000534
batch 628 loss: 0.37338 acc: 0.88835 | v_loss: 0.42029 v_acc: 0.89909 |  iteration: 6844 teacher: 1 stage: sketch lr: 0.000534
batch 629 loss: 0.25479 acc: 0.92155 | v_loss: 0.43210 v_acc: 0.90365 |  iteration: 6845 teacher: 0 stage: sketch lr: 0.000534
batch 630 loss: 0.25323 acc: 0.92448 | v_loss: 0.58193 v_acc: 0.86458 |  iteration: 6846 teacher: 0 stage: sket

batch 688 loss: 0.48252 acc: 0.87240 | v_loss: 0.55227 v_acc: 0.88053 |  iteration: 6904 teacher: 1 stage: sketch lr: 0.000532
batch 689 loss: 0.30122 acc: 0.91602 | v_loss: 0.65335 v_acc: 0.84635 |  iteration: 6905 teacher: 1 stage: sketch lr: 0.000532
batch 690 loss: 0.29485 acc: 0.91309 | v_loss: 0.54369 v_acc: 0.88477 |  iteration: 6906 teacher: 0 stage: sketch lr: 0.000532
batch 691 loss: 0.41309 acc: 0.89746 | v_loss: 0.62599 v_acc: 0.86589 |  iteration: 6907 teacher: 1 stage: sketch lr: 0.000532
batch 692 loss: 0.29809 acc: 0.90853 | v_loss: 0.35973 v_acc: 0.90690 |  iteration: 6908 teacher: 1 stage: sketch lr: 0.000532
batch 693 loss: 0.30898 acc: 0.91406 | v_loss: 0.54488 v_acc: 0.88249 |  iteration: 6909 teacher: 1 stage: sketch lr: 0.000532
batch 694 loss: 0.29201 acc: 0.91634 | v_loss: 0.53810 v_acc: 0.87142 |  iteration: 6910 teacher: 1 stage: sketch lr: 0.000532
batch 695 loss: 0.35040 acc: 0.90723 | v_loss: 0.84093 v_acc: 0.82910 |  iteration: 6911 teacher: 0 stage: sket

batch 753 loss: 0.31680 acc: 0.90495 | v_loss: 0.60473 v_acc: 0.86328 |  iteration: 6969 teacher: 1 stage: sketch lr: 0.000529
batch 754 loss: 0.39609 acc: 0.87923 | v_loss: 0.68979 v_acc: 0.84473 |  iteration: 6970 teacher: 0 stage: sketch lr: 0.000529
batch 755 loss: 0.29289 acc: 0.91243 | v_loss: 0.45852 v_acc: 0.88314 |  iteration: 6971 teacher: 0 stage: sketch lr: 0.000529
batch 756 loss: 0.28964 acc: 0.91048 | v_loss: 0.71648 v_acc: 0.84147 |  iteration: 6972 teacher: 1 stage: sketch lr: 0.000529
batch 757 loss: 0.33355 acc: 0.89941 | v_loss: 0.47327 v_acc: 0.88281 |  iteration: 6973 teacher: 0 stage: sketch lr: 0.000529
batch 758 loss: 0.40905 acc: 0.88770 | v_loss: 0.49154 v_acc: 0.87663 |  iteration: 6974 teacher: 0 stage: sketch lr: 0.000529
batch 759 loss: 0.29235 acc: 0.90723 | v_loss: 0.49934 v_acc: 0.87272 |  iteration: 6975 teacher: 1 stage: sketch lr: 0.000529
batch 760 loss: 0.36156 acc: 0.89421 | v_loss: 0.78843 v_acc: 0.82227 |  iteration: 6976 teacher: 0 stage: sket

batch 818 loss: 1.69432 acc: 0.69108 | v_loss: 1.63590 v_acc: 0.70410 |  iteration: 7034 teacher: 0 stage: sketch lr: 0.000527
batch 819 loss: 1.65202 acc: 0.69434 | v_loss: 1.55727 v_acc: 0.69694 |  iteration: 7035 teacher: 0 stage: sketch lr: 0.000527
batch 820 loss: 1.60373 acc: 0.70345 | v_loss: 1.57437 v_acc: 0.71061 |  iteration: 7036 teacher: 0 stage: sketch lr: 0.000527
batch 821 loss: 1.59079 acc: 0.69954 | v_loss: 1.62780 v_acc: 0.70443 |  iteration: 7037 teacher: 0 stage: sketch lr: 0.000527
batch 822 loss: 1.60013 acc: 0.70671 | v_loss: 1.43510 v_acc: 0.71875 |  iteration: 7038 teacher: 0 stage: sketch lr: 0.000527
batch 823 loss: 1.62080 acc: 0.69564 | v_loss: 1.48582 v_acc: 0.72493 |  iteration: 7039 teacher: 1 stage: sketch lr: 0.000527
batch 824 loss: 1.57758 acc: 0.70573 | v_loss: 1.43753 v_acc: 0.70671 |  iteration: 7040 teacher: 0 stage: sketch lr: 0.000527
batch 825 loss: 1.57556 acc: 0.70020 | v_loss: 1.50192 v_acc: 0.70508 |  iteration: 7041 teacher: 1 stage: sket

batch 883 loss: 1.57366 acc: 0.68880 | v_loss: 1.41770 v_acc: 0.70671 |  iteration: 7099 teacher: 1 stage: sketch lr: 0.000525
batch 884 loss: 1.57643 acc: 0.70540 | v_loss: 1.67758 v_acc: 0.68848 |  iteration: 7100 teacher: 1 stage: sketch lr: 0.000524
batch 885 loss: 1.51877 acc: 0.70801 | v_loss: 1.80399 v_acc: 0.68913 |  iteration: 7101 teacher: 0 stage: sketch lr: 0.000524
batch 886 loss: 1.56069 acc: 0.69564 | v_loss: 1.66206 v_acc: 0.69531 |  iteration: 7102 teacher: 0 stage: sketch lr: 0.000524
batch 887 loss: 1.54330 acc: 0.70020 | v_loss: 1.46024 v_acc: 0.72298 |  iteration: 7103 teacher: 1 stage: sketch lr: 0.000524
batch 888 loss: 1.47523 acc: 0.70964 | v_loss: 1.52133 v_acc: 0.70573 |  iteration: 7104 teacher: 0 stage: sketch lr: 0.000524
batch 889 loss: 1.54890 acc: 0.70280 | v_loss: 1.38328 v_acc: 0.71842 |  iteration: 7105 teacher: 0 stage: sketch lr: 0.000524
batch 890 loss: 1.68520 acc: 0.69499 | v_loss: 1.55894 v_acc: 0.70085 |  iteration: 7106 teacher: 0 stage: sket

batch 948 loss: 1.49508 acc: 0.70475 | v_loss: 1.63735 v_acc: 0.68978 |  iteration: 7164 teacher: 1 stage: sketch lr: 0.000522
batch 949 loss: 1.51489 acc: 0.70475 | v_loss: 1.50556 v_acc: 0.69922 |  iteration: 7165 teacher: 0 stage: sketch lr: 0.000522
batch 950 loss: 1.54485 acc: 0.70443 | v_loss: 1.44186 v_acc: 0.70540 |  iteration: 7166 teacher: 1 stage: sketch lr: 0.000522
batch 951 loss: 1.56165 acc: 0.69792 | v_loss: 1.47124 v_acc: 0.70280 |  iteration: 7167 teacher: 1 stage: sketch lr: 0.000522
batch 952 loss: 1.58349 acc: 0.69434 | v_loss: 1.44371 v_acc: 0.71777 |  iteration: 7168 teacher: 1 stage: sketch lr: 0.000522
batch 953 loss: 1.55550 acc: 0.69531 | v_loss: 1.64023 v_acc: 0.69303 |  iteration: 7169 teacher: 1 stage: sketch lr: 0.000522
batch 954 loss: 1.53875 acc: 0.69759 | v_loss: 1.47659 v_acc: 0.70247 |  iteration: 7170 teacher: 0 stage: sketch lr: 0.000522
batch 955 loss: 1.51735 acc: 0.70150 | v_loss: 1.44876 v_acc: 0.71517 |  iteration: 7171 teacher: 1 stage: sket

batch 1013 loss: 1.50644 acc: 0.70801 | v_loss: 1.36435 v_acc: 0.73405 |  iteration: 7229 teacher: 0 stage: sketch lr: 0.000520
batch 1014 loss: 1.66329 acc: 0.69727 | v_loss: 1.38524 v_acc: 0.71647 |  iteration: 7230 teacher: 1 stage: sketch lr: 0.000520
batch 1015 loss: 1.55135 acc: 0.70020 | v_loss: 1.47768 v_acc: 0.73438 |  iteration: 7231 teacher: 0 stage: sketch lr: 0.000520
batch 1016 loss: 1.52400 acc: 0.69466 | v_loss: 1.37468 v_acc: 0.72331 |  iteration: 7232 teacher: 0 stage: sketch lr: 0.000520
batch 1017 loss: 1.49992 acc: 0.70345 | v_loss: 1.40217 v_acc: 0.71061 |  iteration: 7233 teacher: 0 stage: sketch lr: 0.000520
batch 1018 loss: 1.57547 acc: 0.69661 | v_loss: 1.46111 v_acc: 0.70931 |  iteration: 7234 teacher: 1 stage: sketch lr: 0.000520
batch 1019 loss: 1.41557 acc: 0.71159 | v_loss: 1.51118 v_acc: 0.72201 |  iteration: 7235 teacher: 1 stage: sketch lr: 0.000520
batch 1020 loss: 1.44002 acc: 0.71680 | v_loss: 1.56015 v_acc: 0.70312 |  iteration: 7236 teacher: 0 sta

batch 1078 loss: 1.50010 acc: 0.70410 | v_loss: 1.39398 v_acc: 0.71712 |  iteration: 7294 teacher: 0 stage: sketch lr: 0.000517
batch 1079 loss: 1.45145 acc: 0.69889 | v_loss: 1.50280 v_acc: 0.69043 |  iteration: 7295 teacher: 0 stage: sketch lr: 0.000517
batch 1080 loss: 1.59308 acc: 0.69010 | v_loss: 1.43703 v_acc: 0.70833 |  iteration: 7296 teacher: 0 stage: sketch lr: 0.000517
batch 1081 loss: 1.50208 acc: 0.70508 | v_loss: 1.38131 v_acc: 0.71810 |  iteration: 7297 teacher: 0 stage: sketch lr: 0.000517
batch 1082 loss: 1.47408 acc: 0.70638 | v_loss: 1.37783 v_acc: 0.71452 |  iteration: 7298 teacher: 1 stage: sketch lr: 0.000517
batch 1083 loss: 1.46712 acc: 0.70671 | v_loss: 1.54741 v_acc: 0.70605 |  iteration: 7299 teacher: 0 stage: sketch lr: 0.000517
batch 1084 loss: 1.56689 acc: 0.70540 | v_loss: 1.40215 v_acc: 0.73145 |  iteration: 7300 teacher: 0 stage: sketch lr: 0.000517
batch 1085 loss: 1.55231 acc: 0.70345 | v_loss: 1.60978 v_acc: 0.71549 |  iteration: 7301 teacher: 0 sta

batch 1143 loss: 1.55925 acc: 0.70150 | v_loss: 1.30432 v_acc: 0.72298 |  iteration: 7359 teacher: 1 stage: sketch lr: 0.000515
batch 1144 loss: 1.43954 acc: 0.71419 | v_loss: 1.47147 v_acc: 0.71940 |  iteration: 7360 teacher: 1 stage: sketch lr: 0.000515
batch 1145 loss: 1.51607 acc: 0.69596 | v_loss: 1.45831 v_acc: 0.70475 |  iteration: 7361 teacher: 1 stage: sketch lr: 0.000515
batch 1146 loss: 1.55800 acc: 0.68815 | v_loss: 1.47154 v_acc: 0.70736 |  iteration: 7362 teacher: 0 stage: sketch lr: 0.000515
batch 1147 loss: 1.54044 acc: 0.69987 | v_loss: 1.28100 v_acc: 0.72493 |  iteration: 7363 teacher: 1 stage: sketch lr: 0.000515
batch 1148 loss: 1.40147 acc: 0.70573 | v_loss: 1.46959 v_acc: 0.72949 |  iteration: 7364 teacher: 1 stage: sketch lr: 0.000515
batch 1149 loss: 1.48894 acc: 0.68978 | v_loss: 1.59418 v_acc: 0.69759 |  iteration: 7365 teacher: 0 stage: sketch lr: 0.000515
batch 1150 loss: 1.58075 acc: 0.68652 | v_loss: 1.49630 v_acc: 0.71777 |  iteration: 7366 teacher: 0 sta

batch 1208 loss: 1.45135 acc: 0.70312 | v_loss: 1.39041 v_acc: 0.71191 |  iteration: 7424 teacher: 1 stage: sketch lr: 0.000513
batch 1209 loss: 1.54133 acc: 0.69564 | v_loss: 1.40424 v_acc: 0.69141 |  iteration: 7425 teacher: 1 stage: sketch lr: 0.000513
batch 1210 loss: 1.45264 acc: 0.69857 | v_loss: 1.33571 v_acc: 0.70898 |  iteration: 7426 teacher: 1 stage: sketch lr: 0.000513
batch 1211 loss: 1.38313 acc: 0.70312 | v_loss: 1.41211 v_acc: 0.69531 |  iteration: 7427 teacher: 1 stage: sketch lr: 0.000513
batch 1212 loss: 1.44332 acc: 0.69857 | v_loss: 1.52075 v_acc: 0.71777 |  iteration: 7428 teacher: 0 stage: sketch lr: 0.000513
batch 1213 loss: 1.48572 acc: 0.69889 | v_loss: 1.35819 v_acc: 0.72656 |  iteration: 7429 teacher: 0 stage: sketch lr: 0.000513
batch 1214 loss: 1.48000 acc: 0.70605 | v_loss: 1.49235 v_acc: 0.70280 |  iteration: 7430 teacher: 1 stage: sketch lr: 0.000513
batch 1215 loss: 1.56217 acc: 0.69792 | v_loss: 1.38899 v_acc: 0.70085 |  iteration: 7431 teacher: 0 sta

batch 29 loss: 1.49200 acc: 0.70573 | v_loss: 1.49221 v_acc: 0.70768 |  iteration: 7488 teacher: 1 stage: sketch lr: 0.000511
batch 30 loss: 1.40587 acc: 0.70671 | v_loss: 1.37996 v_acc: 0.72331 |  iteration: 7489 teacher: 0 stage: sketch lr: 0.000511
batch 31 loss: 1.59838 acc: 0.68945 | v_loss: 1.46582 v_acc: 0.70443 |  iteration: 7490 teacher: 0 stage: sketch lr: 0.000511
batch 32 loss: 1.50387 acc: 0.70443 | v_loss: 1.43497 v_acc: 0.69922 |  iteration: 7491 teacher: 0 stage: sketch lr: 0.000511
batch 33 loss: 1.61559 acc: 0.68848 | v_loss: 1.38847 v_acc: 0.70703 |  iteration: 7492 teacher: 1 stage: sketch lr: 0.000511
batch 34 loss: 1.47187 acc: 0.69661 | v_loss: 1.55936 v_acc: 0.69336 |  iteration: 7493 teacher: 0 stage: sketch lr: 0.000511
batch 35 loss: 1.45010 acc: 0.69889 | v_loss: 1.39190 v_acc: 0.72461 |  iteration: 7494 teacher: 0 stage: sketch lr: 0.000511
batch 36 loss: 1.45492 acc: 0.70215 | v_loss: 1.61475 v_acc: 0.68750 |  iteration: 7495 teacher: 1 stage: sketch lr: 0

batch 95 loss: 1.48126 acc: 0.69206 | v_loss: 1.37291 v_acc: 0.72428 |  iteration: 7554 teacher: 0 stage: sketch lr: 0.000508
batch 96 loss: 1.46078 acc: 0.70898 | v_loss: 1.25301 v_acc: 0.70671 |  iteration: 7555 teacher: 1 stage: sketch lr: 0.000508
batch 97 loss: 1.49888 acc: 0.70052 | v_loss: 1.39442 v_acc: 0.70150 |  iteration: 7556 teacher: 0 stage: sketch lr: 0.000508
batch 98 loss: 1.51013 acc: 0.69564 | v_loss: 1.52498 v_acc: 0.69987 |  iteration: 7557 teacher: 0 stage: sketch lr: 0.000508
batch 99 loss: 1.61161 acc: 0.68880 | v_loss: 1.39506 v_acc: 0.70475 |  iteration: 7558 teacher: 1 stage: sketch lr: 0.000508
batch 100 loss: 1.59673 acc: 0.69238 | v_loss: 1.40423 v_acc: 0.69401 |  iteration: 7559 teacher: 0 stage: sketch lr: 0.000508
batch 101 loss: 1.36622 acc: 0.71322 | v_loss: 1.31884 v_acc: 0.70508 |  iteration: 7560 teacher: 0 stage: sketch lr: 0.000508
batch 102 loss: 1.40405 acc: 0.70182 | v_loss: 1.28536 v_acc: 0.70508 |  iteration: 7561 teacher: 1 stage: sketch lr

batch 160 loss: 1.40791 acc: 0.70703 | v_loss: 1.42499 v_acc: 0.70605 |  iteration: 7619 teacher: 1 stage: sketch lr: 0.000506
batch 161 loss: 1.46420 acc: 0.69141 | v_loss: 1.26708 v_acc: 0.71875 |  iteration: 7620 teacher: 0 stage: sketch lr: 0.000506
batch 162 loss: 1.34345 acc: 0.71549 | v_loss: 1.49279 v_acc: 0.69857 |  iteration: 7621 teacher: 1 stage: sketch lr: 0.000506
batch 163 loss: 1.53393 acc: 0.69889 | v_loss: 1.38763 v_acc: 0.71549 |  iteration: 7622 teacher: 1 stage: sketch lr: 0.000506
batch 164 loss: 1.55735 acc: 0.68848 | v_loss: 1.39619 v_acc: 0.72754 |  iteration: 7623 teacher: 1 stage: sketch lr: 0.000506
batch 165 loss: 1.46990 acc: 0.70280 | v_loss: 1.43083 v_acc: 0.71842 |  iteration: 7624 teacher: 0 stage: sketch lr: 0.000506
batch 166 loss: 1.48882 acc: 0.70996 | v_loss: 1.41142 v_acc: 0.70410 |  iteration: 7625 teacher: 1 stage: sketch lr: 0.000506
batch 167 loss: 1.51757 acc: 0.70475 | v_loss: 1.32679 v_acc: 0.72233 |  iteration: 7626 teacher: 0 stage: sket

batch 225 loss: 1.54286 acc: 0.69271 | v_loss: 1.58676 v_acc: 0.68978 |  iteration: 7684 teacher: 1 stage: sketch lr: 0.000504
batch 226 loss: 1.51786 acc: 0.69824 | v_loss: 1.43900 v_acc: 0.70768 |  iteration: 7685 teacher: 0 stage: sketch lr: 0.000504
batch 227 loss: 1.48902 acc: 0.69759 | v_loss: 1.37623 v_acc: 0.70964 |  iteration: 7686 teacher: 1 stage: sketch lr: 0.000504
batch 228 loss: 1.44211 acc: 0.70898 | v_loss: 1.40406 v_acc: 0.71680 |  iteration: 7687 teacher: 1 stage: sketch lr: 0.000504
batch 229 loss: 1.39162 acc: 0.70247 | v_loss: 1.29960 v_acc: 0.70573 |  iteration: 7688 teacher: 0 stage: sketch lr: 0.000504
batch 230 loss: 1.44336 acc: 0.69824 | v_loss: 1.46903 v_acc: 0.69727 |  iteration: 7689 teacher: 0 stage: sketch lr: 0.000504
batch 231 loss: 1.51435 acc: 0.70280 | v_loss: 1.44903 v_acc: 0.71452 |  iteration: 7690 teacher: 1 stage: sketch lr: 0.000504
batch 232 loss: 1.53452 acc: 0.69792 | v_loss: 1.32928 v_acc: 0.71615 |  iteration: 7691 teacher: 0 stage: sket

batch 290 loss: 1.48478 acc: 0.71061 | v_loss: 1.42918 v_acc: 0.71094 |  iteration: 7749 teacher: 1 stage: sketch lr: 0.000502
batch 291 loss: 1.46578 acc: 0.69922 | v_loss: 1.40119 v_acc: 0.72038 |  iteration: 7750 teacher: 0 stage: sketch lr: 0.000502
batch 292 loss: 1.38655 acc: 0.71029 | v_loss: 1.49812 v_acc: 0.69661 |  iteration: 7751 teacher: 1 stage: sketch lr: 0.000502
batch 293 loss: 1.40195 acc: 0.71224 | v_loss: 1.45202 v_acc: 0.71615 |  iteration: 7752 teacher: 1 stage: sketch lr: 0.000502
batch 294 loss: 1.48438 acc: 0.70280 | v_loss: 1.19527 v_acc: 0.74544 |  iteration: 7753 teacher: 0 stage: sketch lr: 0.000502
batch 295 loss: 1.47987 acc: 0.69954 | v_loss: 1.27983 v_acc: 0.70931 |  iteration: 7754 teacher: 1 stage: sketch lr: 0.000502
batch 296 loss: 1.47968 acc: 0.69336 | v_loss: 1.56194 v_acc: 0.68783 |  iteration: 7755 teacher: 0 stage: sketch lr: 0.000502
batch 297 loss: 1.54223 acc: 0.69303 | v_loss: 1.27987 v_acc: 0.69531 |  iteration: 7756 teacher: 0 stage: sket

batch 355 loss: 1.50752 acc: 0.70280 | v_loss: 1.49282 v_acc: 0.70671 |  iteration: 7814 teacher: 0 stage: sketch lr: 0.000500
batch 356 loss: 1.49759 acc: 0.70443 | v_loss: 1.34599 v_acc: 0.73210 |  iteration: 7815 teacher: 0 stage: sketch lr: 0.000500
batch 357 loss: 1.51692 acc: 0.69303 | v_loss: 1.59445 v_acc: 0.71647 |  iteration: 7816 teacher: 0 stage: sketch lr: 0.000500
batch 358 loss: 1.46372 acc: 0.70085 | v_loss: 1.31322 v_acc: 0.69987 |  iteration: 7817 teacher: 1 stage: sketch lr: 0.000500
batch 359 loss: 1.51547 acc: 0.69857 | v_loss: 1.30955 v_acc: 0.70312 |  iteration: 7818 teacher: 0 stage: sketch lr: 0.000500
batch 360 loss: 1.50742 acc: 0.70280 | v_loss: 1.44737 v_acc: 0.70703 |  iteration: 7819 teacher: 1 stage: sketch lr: 0.000500
batch 361 loss: 1.56187 acc: 0.69401 | v_loss: 1.47589 v_acc: 0.70638 |  iteration: 7820 teacher: 0 stage: sketch lr: 0.000500
batch 362 loss: 1.55785 acc: 0.69238 | v_loss: 1.52942 v_acc: 0.68815 |  iteration: 7821 teacher: 1 stage: sket

batch 420 loss: 1.45427 acc: 0.70247 | v_loss: 1.40389 v_acc: 0.73177 |  iteration: 7879 teacher: 1 stage: sketch lr: 0.000498
batch 421 loss: 1.39054 acc: 0.70443 | v_loss: 1.49135 v_acc: 0.69661 |  iteration: 7880 teacher: 1 stage: sketch lr: 0.000498
batch 422 loss: 1.40396 acc: 0.70540 | v_loss: 1.45700 v_acc: 0.71908 |  iteration: 7881 teacher: 1 stage: sketch lr: 0.000498
batch 423 loss: 1.36770 acc: 0.70703 | v_loss: 1.25966 v_acc: 0.71908 |  iteration: 7882 teacher: 1 stage: sketch lr: 0.000498
batch 424 loss: 1.43023 acc: 0.69564 | v_loss: 1.20032 v_acc: 0.73861 |  iteration: 7883 teacher: 0 stage: sketch lr: 0.000498
batch 425 loss: 1.53284 acc: 0.69824 | v_loss: 1.22673 v_acc: 0.72493 |  iteration: 7884 teacher: 1 stage: sketch lr: 0.000498
batch 426 loss: 1.46371 acc: 0.69499 | v_loss: 1.30576 v_acc: 0.70638 |  iteration: 7885 teacher: 1 stage: sketch lr: 0.000498
batch 427 loss: 1.48114 acc: 0.70020 | v_loss: 1.46154 v_acc: 0.69499 |  iteration: 7886 teacher: 0 stage: sket

batch 485 loss: 1.42550 acc: 0.70052 | v_loss: 1.34771 v_acc: 0.72396 |  iteration: 7944 teacher: 0 stage: sketch lr: 0.000496
batch 486 loss: 1.39133 acc: 0.71094 | v_loss: 1.43746 v_acc: 0.70801 |  iteration: 7945 teacher: 0 stage: sketch lr: 0.000496
batch 487 loss: 1.46865 acc: 0.69987 | v_loss: 1.41034 v_acc: 0.69922 |  iteration: 7946 teacher: 0 stage: sketch lr: 0.000496
batch 488 loss: 1.48903 acc: 0.69792 | v_loss: 1.35013 v_acc: 0.70605 |  iteration: 7947 teacher: 1 stage: sketch lr: 0.000496
batch 489 loss: 1.42411 acc: 0.70410 | v_loss: 1.56812 v_acc: 0.68848 |  iteration: 7948 teacher: 0 stage: sketch lr: 0.000496
batch 490 loss: 1.39367 acc: 0.71029 | v_loss: 1.31196 v_acc: 0.72135 |  iteration: 7949 teacher: 1 stage: sketch lr: 0.000496
batch 491 loss: 1.45351 acc: 0.70671 | v_loss: 1.59790 v_acc: 0.68620 |  iteration: 7950 teacher: 1 stage: sketch lr: 0.000496
batch 492 loss: 1.45898 acc: 0.70150 | v_loss: 1.46766 v_acc: 0.69792 |  iteration: 7951 teacher: 1 stage: sket

batch 550 loss: 1.45019 acc: 0.70020 | v_loss: 1.32859 v_acc: 0.72331 |  iteration: 8009 teacher: 0 stage: sketch lr: 0.000494
batch 551 loss: 1.59036 acc: 0.68717 | v_loss: 1.21283 v_acc: 0.70671 |  iteration: 8010 teacher: 0 stage: sketch lr: 0.000494
batch 552 loss: 1.40970 acc: 0.70508 | v_loss: 1.36658 v_acc: 0.70150 |  iteration: 8011 teacher: 0 stage: sketch lr: 0.000494
batch 553 loss: 1.48281 acc: 0.70182 | v_loss: 1.50010 v_acc: 0.69987 |  iteration: 8012 teacher: 1 stage: sketch lr: 0.000494
batch 554 loss: 1.58722 acc: 0.68099 | v_loss: 1.37411 v_acc: 0.70443 |  iteration: 8013 teacher: 1 stage: sketch lr: 0.000494
batch 555 loss: 1.49868 acc: 0.69987 | v_loss: 1.36724 v_acc: 0.69922 |  iteration: 8014 teacher: 0 stage: sketch lr: 0.000494
batch 556 loss: 1.45066 acc: 0.70605 | v_loss: 1.31961 v_acc: 0.70638 |  iteration: 8015 teacher: 0 stage: sketch lr: 0.000494
batch 557 loss: 1.60133 acc: 0.68913 | v_loss: 1.29260 v_acc: 0.70703 |  iteration: 8016 teacher: 0 stage: sket

batch 615 loss: 1.36744 acc: 0.70671 | v_loss: 1.38530 v_acc: 0.70996 |  iteration: 8074 teacher: 1 stage: sketch lr: 0.000492
batch 616 loss: 1.50825 acc: 0.69954 | v_loss: 1.25825 v_acc: 0.71973 |  iteration: 8075 teacher: 0 stage: sketch lr: 0.000492
batch 617 loss: 1.37965 acc: 0.71354 | v_loss: 1.43177 v_acc: 0.69857 |  iteration: 8076 teacher: 1 stage: sketch lr: 0.000492
batch 618 loss: 1.54085 acc: 0.69759 | v_loss: 1.36653 v_acc: 0.70964 |  iteration: 8077 teacher: 0 stage: sketch lr: 0.000492
batch 619 loss: 1.47973 acc: 0.69076 | v_loss: 1.37420 v_acc: 0.72949 |  iteration: 8078 teacher: 0 stage: sketch lr: 0.000492
batch 620 loss: 1.48172 acc: 0.69987 | v_loss: 1.36891 v_acc: 0.71842 |  iteration: 8079 teacher: 1 stage: sketch lr: 0.000492
batch 621 loss: 1.49933 acc: 0.70085 | v_loss: 1.39846 v_acc: 0.70215 |  iteration: 8080 teacher: 0 stage: sketch lr: 0.000492
batch 622 loss: 1.48435 acc: 0.69401 | v_loss: 1.32031 v_acc: 0.72201 |  iteration: 8081 teacher: 1 stage: sket

batch 680 loss: 1.52570 acc: 0.68848 | v_loss: 1.53156 v_acc: 0.69238 |  iteration: 8139 teacher: 1 stage: sketch lr: 0.000490
batch 681 loss: 1.45371 acc: 0.69499 | v_loss: 1.39268 v_acc: 0.70443 |  iteration: 8140 teacher: 0 stage: sketch lr: 0.000490
batch 682 loss: 1.50544 acc: 0.69466 | v_loss: 1.37881 v_acc: 0.71029 |  iteration: 8141 teacher: 0 stage: sketch lr: 0.000490
batch 683 loss: 1.44847 acc: 0.70085 | v_loss: 1.38897 v_acc: 0.71680 |  iteration: 8142 teacher: 1 stage: sketch lr: 0.000490
batch 684 loss: 1.41288 acc: 0.71159 | v_loss: 1.29201 v_acc: 0.70215 |  iteration: 8143 teacher: 1 stage: sketch lr: 0.000490
batch 685 loss: 1.49028 acc: 0.69434 | v_loss: 1.44540 v_acc: 0.69434 |  iteration: 8144 teacher: 1 stage: sketch lr: 0.000490
batch 686 loss: 1.44720 acc: 0.70573 | v_loss: 1.42721 v_acc: 0.71452 |  iteration: 8145 teacher: 0 stage: sketch lr: 0.000490
batch 687 loss: 1.55353 acc: 0.69206 | v_loss: 1.31434 v_acc: 0.71777 |  iteration: 8146 teacher: 0 stage: sket

batch 745 loss: 1.38088 acc: 0.71322 | v_loss: 1.42512 v_acc: 0.71289 |  iteration: 8204 teacher: 1 stage: sketch lr: 0.000488
batch 746 loss: 1.51810 acc: 0.69661 | v_loss: 1.39629 v_acc: 0.72038 |  iteration: 8205 teacher: 0 stage: sketch lr: 0.000488
batch 747 loss: 1.38494 acc: 0.71159 | v_loss: 1.49856 v_acc: 0.69661 |  iteration: 8206 teacher: 0 stage: sketch lr: 0.000488
batch 748 loss: 1.48335 acc: 0.69499 | v_loss: 1.42091 v_acc: 0.71745 |  iteration: 8207 teacher: 0 stage: sketch lr: 0.000488
batch 749 loss: 1.49941 acc: 0.69922 | v_loss: 1.18726 v_acc: 0.74382 |  iteration: 8208 teacher: 0 stage: sketch lr: 0.000488
batch 750 loss: 1.51544 acc: 0.69564 | v_loss: 1.28159 v_acc: 0.70280 |  iteration: 8209 teacher: 1 stage: sketch lr: 0.000488
batch 751 loss: 1.43156 acc: 0.71029 | v_loss: 1.50612 v_acc: 0.70898 |  iteration: 8210 teacher: 1 stage: sketch lr: 0.000488
batch 752 loss: 1.43814 acc: 0.69824 | v_loss: 1.23606 v_acc: 0.71647 |  iteration: 8211 teacher: 1 stage: sket

batch 810 loss: 1.42791 acc: 0.71549 | v_loss: 1.43259 v_acc: 0.70768 |  iteration: 8269 teacher: 0 stage: sketch lr: 0.000486
batch 811 loss: 1.56987 acc: 0.68392 | v_loss: 1.33441 v_acc: 0.73210 |  iteration: 8270 teacher: 0 stage: sketch lr: 0.000486
batch 812 loss: 1.44098 acc: 0.70280 | v_loss: 1.53827 v_acc: 0.71322 |  iteration: 8271 teacher: 1 stage: sketch lr: 0.000486
batch 813 loss: 1.57443 acc: 0.69108 | v_loss: 1.31165 v_acc: 0.69531 |  iteration: 8272 teacher: 0 stage: sketch lr: 0.000486
batch 814 loss: 1.42270 acc: 0.70312 | v_loss: 1.29942 v_acc: 0.70117 |  iteration: 8273 teacher: 1 stage: sketch lr: 0.000486
batch 815 loss: 1.47159 acc: 0.69206 | v_loss: 1.45297 v_acc: 0.70508 |  iteration: 8274 teacher: 1 stage: sketch lr: 0.000486
batch 816 loss: 1.40948 acc: 0.70280 | v_loss: 1.47231 v_acc: 0.70573 |  iteration: 8275 teacher: 1 stage: sketch lr: 0.000486
batch 817 loss: 1.52205 acc: 0.69010 | v_loss: 1.51779 v_acc: 0.69076 |  iteration: 8276 teacher: 0 stage: sket

batch 875 loss: 1.33654 acc: 0.70312 | v_loss: 1.38928 v_acc: 0.73145 |  iteration: 8334 teacher: 0 stage: sketch lr: 0.000484
batch 876 loss: 1.43705 acc: 0.70508 | v_loss: 1.48994 v_acc: 0.69694 |  iteration: 8335 teacher: 0 stage: sketch lr: 0.000484
batch 877 loss: 1.57217 acc: 0.69043 | v_loss: 1.43410 v_acc: 0.72070 |  iteration: 8336 teacher: 1 stage: sketch lr: 0.000484
batch 878 loss: 1.42763 acc: 0.69596 | v_loss: 1.27446 v_acc: 0.71940 |  iteration: 8337 teacher: 1 stage: sketch lr: 0.000484
batch 879 loss: 1.43291 acc: 0.71322 | v_loss: 1.22699 v_acc: 0.73535 |  iteration: 8338 teacher: 1 stage: sketch lr: 0.000484
batch 880 loss: 1.40663 acc: 0.69889 | v_loss: 1.23258 v_acc: 0.72656 |  iteration: 8339 teacher: 0 stage: sketch lr: 0.000484
batch 881 loss: 1.45203 acc: 0.69499 | v_loss: 1.30800 v_acc: 0.70703 |  iteration: 8340 teacher: 0 stage: sketch lr: 0.000484
batch 882 loss: 1.43298 acc: 0.69434 | v_loss: 1.46349 v_acc: 0.69466 |  iteration: 8341 teacher: 1 stage: sket

batch 940 loss: 1.52654 acc: 0.69857 | v_loss: 1.32673 v_acc: 0.72786 |  iteration: 8399 teacher: 0 stage: sketch lr: 0.000482
batch 941 loss: 1.48691 acc: 0.69531 | v_loss: 1.44657 v_acc: 0.70540 |  iteration: 8400 teacher: 0 stage: sketch lr: 0.000482
batch 942 loss: 1.60557 acc: 0.69010 | v_loss: 1.38277 v_acc: 0.69857 |  iteration: 8401 teacher: 0 stage: sketch lr: 0.000482
batch 943 loss: 1.56631 acc: 0.68685 | v_loss: 1.34374 v_acc: 0.70833 |  iteration: 8402 teacher: 0 stage: sketch lr: 0.000482
batch 944 loss: 1.43738 acc: 0.70801 | v_loss: 1.58255 v_acc: 0.68522 |  iteration: 8403 teacher: 1 stage: sketch lr: 0.000482
batch 945 loss: 1.46334 acc: 0.70312 | v_loss: 1.30383 v_acc: 0.72005 |  iteration: 8404 teacher: 0 stage: sketch lr: 0.000482
batch 946 loss: 1.41478 acc: 0.70280 | v_loss: 1.60683 v_acc: 0.68620 |  iteration: 8405 teacher: 1 stage: sketch lr: 0.000482
batch 947 loss: 1.47839 acc: 0.69759 | v_loss: 1.44924 v_acc: 0.69987 |  iteration: 8406 teacher: 0 stage: sket

batch 1005 loss: 1.51975 acc: 0.68685 | v_loss: 1.34560 v_acc: 0.72493 |  iteration: 8464 teacher: 1 stage: sketch lr: 0.000480
batch 1006 loss: 1.50813 acc: 0.70215 | v_loss: 1.21472 v_acc: 0.70671 |  iteration: 8465 teacher: 1 stage: sketch lr: 0.000480
batch 1007 loss: 1.50061 acc: 0.70247 | v_loss: 1.36200 v_acc: 0.70150 |  iteration: 8466 teacher: 1 stage: sketch lr: 0.000480
batch 1008 loss: 1.55832 acc: 0.69596 | v_loss: 1.49457 v_acc: 0.69987 |  iteration: 8467 teacher: 1 stage: sketch lr: 0.000480
batch 1009 loss: 1.48217 acc: 0.69954 | v_loss: 1.34864 v_acc: 0.70540 |  iteration: 8468 teacher: 0 stage: sketch lr: 0.000480
batch 1010 loss: 1.38739 acc: 0.69629 | v_loss: 1.36146 v_acc: 0.69368 |  iteration: 8469 teacher: 0 stage: sketch lr: 0.000480
batch 1011 loss: 1.51622 acc: 0.69043 | v_loss: 1.29525 v_acc: 0.70638 |  iteration: 8470 teacher: 0 stage: sketch lr: 0.000480
batch 1012 loss: 1.42779 acc: 0.70150 | v_loss: 1.29829 v_acc: 0.70215 |  iteration: 8471 teacher: 1 sta

batch 1070 loss: 1.38390 acc: 0.71126 | v_loss: 1.39417 v_acc: 0.69987 |  iteration: 8529 teacher: 0 stage: sketch lr: 0.000479
batch 1071 loss: 1.39216 acc: 0.71647 | v_loss: 1.23106 v_acc: 0.71940 |  iteration: 8530 teacher: 0 stage: sketch lr: 0.000479
batch 1072 loss: 1.42445 acc: 0.69954 | v_loss: 1.44132 v_acc: 0.70378 |  iteration: 8531 teacher: 0 stage: sketch lr: 0.000478
batch 1073 loss: 1.44304 acc: 0.70410 | v_loss: 1.36946 v_acc: 0.71810 |  iteration: 8532 teacher: 0 stage: sketch lr: 0.000478
batch 1074 loss: 1.61313 acc: 0.69629 | v_loss: 1.37019 v_acc: 0.72754 |  iteration: 8533 teacher: 0 stage: sketch lr: 0.000478
batch 1075 loss: 1.40630 acc: 0.70215 | v_loss: 1.40840 v_acc: 0.71842 |  iteration: 8534 teacher: 1 stage: sketch lr: 0.000478
batch 1076 loss: 1.48636 acc: 0.70638 | v_loss: 1.40257 v_acc: 0.71159 |  iteration: 8535 teacher: 1 stage: sketch lr: 0.000478
batch 1077 loss: 1.47640 acc: 0.70671 | v_loss: 1.29156 v_acc: 0.72461 |  iteration: 8536 teacher: 1 sta

batch 1135 loss: 1.46389 acc: 0.69759 | v_loss: 1.53040 v_acc: 0.69173 |  iteration: 8594 teacher: 0 stage: sketch lr: 0.000477
batch 1136 loss: 1.45515 acc: 0.70605 | v_loss: 1.39543 v_acc: 0.70247 |  iteration: 8595 teacher: 1 stage: sketch lr: 0.000477
batch 1137 loss: 1.47075 acc: 0.70605 | v_loss: 1.36182 v_acc: 0.71126 |  iteration: 8596 teacher: 1 stage: sketch lr: 0.000477
batch 1138 loss: 1.46322 acc: 0.69661 | v_loss: 1.39805 v_acc: 0.71810 |  iteration: 8597 teacher: 1 stage: sketch lr: 0.000477
batch 1139 loss: 1.53608 acc: 0.69922 | v_loss: 1.30725 v_acc: 0.70215 |  iteration: 8598 teacher: 0 stage: sketch lr: 0.000477
batch 1140 loss: 1.41956 acc: 0.70443 | v_loss: 1.44091 v_acc: 0.69434 |  iteration: 8599 teacher: 0 stage: sketch lr: 0.000477
batch 1141 loss: 1.39353 acc: 0.70736 | v_loss: 1.42158 v_acc: 0.71452 |  iteration: 8600 teacher: 0 stage: sketch lr: 0.000477
batch 1142 loss: 1.42459 acc: 0.70671 | v_loss: 1.30158 v_acc: 0.71647 |  iteration: 8601 teacher: 0 sta

batch 1200 loss: 1.35362 acc: 0.71354 | v_loss: 1.41721 v_acc: 0.71224 |  iteration: 8659 teacher: 0 stage: sketch lr: 0.000475
batch 1201 loss: 1.52347 acc: 0.70345 | v_loss: 1.40832 v_acc: 0.72070 |  iteration: 8660 teacher: 0 stage: sketch lr: 0.000475
batch 1202 loss: 1.33151 acc: 0.71484 | v_loss: 1.50327 v_acc: 0.69954 |  iteration: 8661 teacher: 1 stage: sketch lr: 0.000475
batch 1203 loss: 1.54325 acc: 0.69043 | v_loss: 1.45743 v_acc: 0.71615 |  iteration: 8662 teacher: 1 stage: sketch lr: 0.000475
batch 1204 loss: 1.46807 acc: 0.69759 | v_loss: 1.19139 v_acc: 0.74512 |  iteration: 8663 teacher: 1 stage: sketch lr: 0.000475
batch 1205 loss: 1.40746 acc: 0.70280 | v_loss: 1.26812 v_acc: 0.70964 |  iteration: 8664 teacher: 1 stage: sketch lr: 0.000475
batch 1206 loss: 1.39463 acc: 0.70736 | v_loss: 1.56950 v_acc: 0.69792 |  iteration: 8665 teacher: 1 stage: sketch lr: 0.000475
batch 1207 loss: 1.43683 acc: 0.70410 | v_loss: 1.23176 v_acc: 0.70671 |  iteration: 8666 teacher: 1 sta

batch 21 loss: 1.48392 acc: 0.69889 | v_loss: 1.18737 v_acc: 0.74544 |  iteration: 8723 teacher: 0 stage: sketch lr: 0.000473
batch 22 loss: 1.46044 acc: 0.70117 | v_loss: 1.27559 v_acc: 0.70280 |  iteration: 8724 teacher: 1 stage: sketch lr: 0.000473
batch 23 loss: 1.43262 acc: 0.70475 | v_loss: 1.52321 v_acc: 0.70280 |  iteration: 8725 teacher: 0 stage: sketch lr: 0.000473
batch 24 loss: 1.54240 acc: 0.70215 | v_loss: 1.25386 v_acc: 0.71647 |  iteration: 8726 teacher: 0 stage: sketch lr: 0.000473
batch 25 loss: 1.52768 acc: 0.69661 | v_loss: 1.33873 v_acc: 0.71191 |  iteration: 8727 teacher: 0 stage: sketch lr: 0.000473
batch 26 loss: 1.35519 acc: 0.72233 | v_loss: 1.37540 v_acc: 0.69076 |  iteration: 8728 teacher: 0 stage: sketch lr: 0.000473
batch 27 loss: 1.44075 acc: 0.70117 | v_loss: 1.33306 v_acc: 0.70898 |  iteration: 8729 teacher: 1 stage: sketch lr: 0.000473
batch 28 loss: 1.43673 acc: 0.71126 | v_loss: 1.38712 v_acc: 0.69206 |  iteration: 8730 teacher: 0 stage: sketch lr: 0

batch 87 loss: 1.53366 acc: 0.68750 | v_loss: 1.43323 v_acc: 0.70475 |  iteration: 8789 teacher: 0 stage: sketch lr: 0.000471
batch 88 loss: 1.45063 acc: 0.69434 | v_loss: 1.47211 v_acc: 0.70280 |  iteration: 8790 teacher: 1 stage: sketch lr: 0.000471
batch 89 loss: 1.44718 acc: 0.70280 | v_loss: 1.53397 v_acc: 0.68783 |  iteration: 8791 teacher: 1 stage: sketch lr: 0.000471
batch 90 loss: 1.42911 acc: 0.70768 | v_loss: 1.47398 v_acc: 0.70540 |  iteration: 8792 teacher: 0 stage: sketch lr: 0.000471
batch 91 loss: 1.44253 acc: 0.70345 | v_loss: 1.44575 v_acc: 0.70540 |  iteration: 8793 teacher: 1 stage: sketch lr: 0.000471
batch 92 loss: 1.44214 acc: 0.69922 | v_loss: 1.42747 v_acc: 0.70475 |  iteration: 8794 teacher: 1 stage: sketch lr: 0.000471
batch 93 loss: 1.44853 acc: 0.70573 | v_loss: 1.41262 v_acc: 0.70312 |  iteration: 8795 teacher: 1 stage: sketch lr: 0.000471
batch 94 loss: 1.36587 acc: 0.70768 | v_loss: 1.27990 v_acc: 0.71257 |  iteration: 8796 teacher: 0 stage: sketch lr: 0

batch 152 loss: 1.44134 acc: 0.70801 | v_loss: 1.25268 v_acc: 0.72559 |  iteration: 8854 teacher: 0 stage: sketch lr: 0.000470
batch 153 loss: 1.40669 acc: 0.70475 | v_loss: 1.34450 v_acc: 0.70540 |  iteration: 8855 teacher: 1 stage: sketch lr: 0.000470
batch 154 loss: 1.43191 acc: 0.70020 | v_loss: 1.48139 v_acc: 0.69596 |  iteration: 8856 teacher: 1 stage: sketch lr: 0.000470
batch 155 loss: 1.32100 acc: 0.71615 | v_loss: 1.29533 v_acc: 0.71582 |  iteration: 8857 teacher: 1 stage: sketch lr: 0.000470
batch 156 loss: 1.46182 acc: 0.69857 | v_loss: 1.43422 v_acc: 0.72949 |  iteration: 8858 teacher: 1 stage: sketch lr: 0.000470
batch 157 loss: 1.38894 acc: 0.70215 | v_loss: 1.65690 v_acc: 0.69466 |  iteration: 8859 teacher: 1 stage: sketch lr: 0.000470
batch 158 loss: 1.43780 acc: 0.69889 | v_loss: 1.54435 v_acc: 0.70117 |  iteration: 8860 teacher: 0 stage: sketch lr: 0.000470
batch 159 loss: 1.48757 acc: 0.69401 | v_loss: 1.31502 v_acc: 0.72396 |  iteration: 8861 teacher: 0 stage: sket

batch 217 loss: 1.35550 acc: 0.70898 | v_loss: 1.32068 v_acc: 0.72135 |  iteration: 8919 teacher: 0 stage: sketch lr: 0.000468
batch 218 loss: 1.40225 acc: 0.70964 | v_loss: 1.57687 v_acc: 0.68620 |  iteration: 8920 teacher: 1 stage: sketch lr: 0.000468
batch 219 loss: 1.34056 acc: 0.70638 | v_loss: 1.43232 v_acc: 0.69987 |  iteration: 8921 teacher: 0 stage: sketch lr: 0.000468
batch 220 loss: 1.36705 acc: 0.71029 | v_loss: 1.52547 v_acc: 0.68913 |  iteration: 8922 teacher: 1 stage: sketch lr: 0.000468
batch 221 loss: 1.43598 acc: 0.70540 | v_loss: 1.38102 v_acc: 0.69759 |  iteration: 8923 teacher: 0 stage: sketch lr: 0.000468
batch 222 loss: 1.48635 acc: 0.69596 | v_loss: 1.33996 v_acc: 0.70312 |  iteration: 8924 teacher: 0 stage: sketch lr: 0.000468
batch 223 loss: 1.45554 acc: 0.70605 | v_loss: 1.35080 v_acc: 0.70020 |  iteration: 8925 teacher: 1 stage: sketch lr: 0.000468
batch 224 loss: 1.34716 acc: 0.70964 | v_loss: 1.35158 v_acc: 0.71484 |  iteration: 8926 teacher: 0 stage: sket

batch 282 loss: 1.56266 acc: 0.69629 | v_loss: 1.37240 v_acc: 0.69368 |  iteration: 8984 teacher: 1 stage: sketch lr: 0.000466
batch 283 loss: 1.31646 acc: 0.70475 | v_loss: 1.26843 v_acc: 0.70573 |  iteration: 8985 teacher: 0 stage: sketch lr: 0.000466
batch 284 loss: 1.37303 acc: 0.70866 | v_loss: 1.27079 v_acc: 0.70215 |  iteration: 8986 teacher: 0 stage: sketch lr: 0.000466
batch 285 loss: 1.46122 acc: 0.69531 | v_loss: 1.24915 v_acc: 0.73340 |  iteration: 8987 teacher: 1 stage: sketch lr: 0.000466
batch 286 loss: 1.42680 acc: 0.70247 | v_loss: 1.28492 v_acc: 0.71647 |  iteration: 8988 teacher: 0 stage: sketch lr: 0.000466
batch 287 loss: 1.44369 acc: 0.70215 | v_loss: 1.36957 v_acc: 0.74089 |  iteration: 8989 teacher: 1 stage: sketch lr: 0.000466
batch 288 loss: 1.54188 acc: 0.69206 | v_loss: 1.28631 v_acc: 0.71484 |  iteration: 8990 teacher: 0 stage: sketch lr: 0.000466
batch 289 loss: 1.42102 acc: 0.70085 | v_loss: 1.30330 v_acc: 0.72168 |  iteration: 8991 teacher: 0 stage: sket

batch 347 loss: 1.47011 acc: 0.68750 | v_loss: 1.35139 v_acc: 0.71842 |  iteration: 9049 teacher: 1 stage: sketch lr: 0.000465
batch 348 loss: 1.41743 acc: 0.69954 | v_loss: 1.37678 v_acc: 0.70312 |  iteration: 9050 teacher: 0 stage: sketch lr: 0.000465
batch 349 loss: 1.48571 acc: 0.70768 | v_loss: 1.31365 v_acc: 0.72201 |  iteration: 9051 teacher: 0 stage: sketch lr: 0.000465
batch 350 loss: 1.42802 acc: 0.70605 | v_loss: 1.31779 v_acc: 0.72005 |  iteration: 9052 teacher: 0 stage: sketch lr: 0.000465
batch 351 loss: 1.44712 acc: 0.70508 | v_loss: 1.55290 v_acc: 0.69043 |  iteration: 9053 teacher: 1 stage: sketch lr: 0.000464
batch 352 loss: 1.45878 acc: 0.69792 | v_loss: 1.34409 v_acc: 0.70833 |  iteration: 9054 teacher: 1 stage: sketch lr: 0.000464
batch 353 loss: 1.47610 acc: 0.70605 | v_loss: 1.29974 v_acc: 0.71582 |  iteration: 9055 teacher: 0 stage: sketch lr: 0.000464
batch 354 loss: 1.44619 acc: 0.70052 | v_loss: 1.29626 v_acc: 0.71712 |  iteration: 9056 teacher: 1 stage: sket

batch 412 loss: 1.36137 acc: 0.70247 | v_loss: 1.43852 v_acc: 0.69434 |  iteration: 9114 teacher: 1 stage: sketch lr: 0.000463
batch 413 loss: 1.41250 acc: 0.69857 | v_loss: 1.44549 v_acc: 0.71452 |  iteration: 9115 teacher: 0 stage: sketch lr: 0.000463
batch 414 loss: 1.40082 acc: 0.70117 | v_loss: 1.30252 v_acc: 0.71647 |  iteration: 9116 teacher: 0 stage: sketch lr: 0.000463
batch 415 loss: 1.38863 acc: 0.70215 | v_loss: 1.26849 v_acc: 0.72526 |  iteration: 9117 teacher: 1 stage: sketch lr: 0.000463
batch 416 loss: 1.39516 acc: 0.70996 | v_loss: 1.38160 v_acc: 0.71647 |  iteration: 9118 teacher: 1 stage: sketch lr: 0.000463
batch 417 loss: 1.45281 acc: 0.69954 | v_loss: 1.42137 v_acc: 0.70052 |  iteration: 9119 teacher: 1 stage: sketch lr: 0.000463
batch 418 loss: 1.42681 acc: 0.69336 | v_loss: 1.42789 v_acc: 0.70833 |  iteration: 9120 teacher: 0 stage: sketch lr: 0.000463
batch 419 loss: 1.51509 acc: 0.69661 | v_loss: 1.22934 v_acc: 0.72591 |  iteration: 9121 teacher: 0 stage: sket

batch 477 loss: 1.44343 acc: 0.71061 | v_loss: 1.26836 v_acc: 0.70280 |  iteration: 9179 teacher: 1 stage: sketch lr: 0.000461
batch 478 loss: 1.44072 acc: 0.70703 | v_loss: 1.53161 v_acc: 0.70247 |  iteration: 9180 teacher: 1 stage: sketch lr: 0.000461
batch 479 loss: 1.38323 acc: 0.70638 | v_loss: 1.25955 v_acc: 0.70833 |  iteration: 9181 teacher: 0 stage: sketch lr: 0.000461
batch 480 loss: 1.31576 acc: 0.71745 | v_loss: 1.33692 v_acc: 0.71354 |  iteration: 9182 teacher: 0 stage: sketch lr: 0.000461
batch 481 loss: 1.44531 acc: 0.70605 | v_loss: 1.37010 v_acc: 0.69368 |  iteration: 9183 teacher: 0 stage: sketch lr: 0.000461
batch 482 loss: 1.40523 acc: 0.70736 | v_loss: 1.30885 v_acc: 0.71842 |  iteration: 9184 teacher: 0 stage: sketch lr: 0.000461
batch 483 loss: 1.47052 acc: 0.70182 | v_loss: 1.37956 v_acc: 0.70215 |  iteration: 9185 teacher: 0 stage: sketch lr: 0.000461
batch 484 loss: 1.45764 acc: 0.70280 | v_loss: 1.48114 v_acc: 0.72005 |  iteration: 9186 teacher: 0 stage: sket

batch 542 loss: 1.48779 acc: 0.69727 | v_loss: 1.45014 v_acc: 0.70508 |  iteration: 9244 teacher: 0 stage: sketch lr: 0.000460
batch 543 loss: 1.43542 acc: 0.69954 | v_loss: 1.47759 v_acc: 0.70573 |  iteration: 9245 teacher: 0 stage: sketch lr: 0.000460
batch 544 loss: 1.34847 acc: 0.70117 | v_loss: 1.51912 v_acc: 0.68978 |  iteration: 9246 teacher: 0 stage: sketch lr: 0.000460
batch 545 loss: 1.43480 acc: 0.70378 | v_loss: 1.46540 v_acc: 0.70540 |  iteration: 9247 teacher: 0 stage: sketch lr: 0.000460
batch 546 loss: 1.47680 acc: 0.70312 | v_loss: 1.43581 v_acc: 0.70410 |  iteration: 9248 teacher: 1 stage: sketch lr: 0.000460
batch 547 loss: 1.39438 acc: 0.70443 | v_loss: 1.40857 v_acc: 0.70768 |  iteration: 9249 teacher: 1 stage: sketch lr: 0.000460
batch 548 loss: 1.55852 acc: 0.69173 | v_loss: 1.41155 v_acc: 0.70312 |  iteration: 9250 teacher: 0 stage: sketch lr: 0.000460
batch 549 loss: 1.37193 acc: 0.70508 | v_loss: 1.28561 v_acc: 0.71257 |  iteration: 9251 teacher: 1 stage: sket

batch 607 loss: 1.53587 acc: 0.68913 | v_loss: 1.22152 v_acc: 0.72656 |  iteration: 9309 teacher: 0 stage: sketch lr: 0.000458
batch 608 loss: 1.45755 acc: 0.70605 | v_loss: 1.28664 v_acc: 0.70703 |  iteration: 9310 teacher: 0 stage: sketch lr: 0.000458
batch 609 loss: 1.44307 acc: 0.70312 | v_loss: 1.45835 v_acc: 0.69596 |  iteration: 9311 teacher: 1 stage: sketch lr: 0.000458
batch 610 loss: 1.43367 acc: 0.71224 | v_loss: 1.29228 v_acc: 0.71484 |  iteration: 9312 teacher: 1 stage: sketch lr: 0.000458
batch 611 loss: 1.61002 acc: 0.68620 | v_loss: 1.44420 v_acc: 0.72949 |  iteration: 9313 teacher: 1 stage: sketch lr: 0.000458
batch 612 loss: 1.31057 acc: 0.73503 | v_loss: 1.65083 v_acc: 0.69466 |  iteration: 9314 teacher: 1 stage: sketch lr: 0.000458
batch 613 loss: 1.42975 acc: 0.71224 | v_loss: 1.52369 v_acc: 0.70410 |  iteration: 9315 teacher: 0 stage: sketch lr: 0.000458
batch 614 loss: 1.44898 acc: 0.70638 | v_loss: 1.30887 v_acc: 0.72005 |  iteration: 9316 teacher: 1 stage: sket

batch 672 loss: 1.48223 acc: 0.69694 | v_loss: 1.31660 v_acc: 0.72103 |  iteration: 9374 teacher: 0 stage: sketch lr: 0.000456
batch 673 loss: 1.43852 acc: 0.70736 | v_loss: 1.58521 v_acc: 0.68620 |  iteration: 9375 teacher: 1 stage: sketch lr: 0.000456
batch 674 loss: 1.53991 acc: 0.69141 | v_loss: 1.42898 v_acc: 0.69987 |  iteration: 9376 teacher: 1 stage: sketch lr: 0.000456
batch 675 loss: 1.37539 acc: 0.70833 | v_loss: 1.52605 v_acc: 0.68913 |  iteration: 9377 teacher: 1 stage: sketch lr: 0.000456
batch 676 loss: 1.52803 acc: 0.68783 | v_loss: 1.37053 v_acc: 0.69759 |  iteration: 9378 teacher: 1 stage: sketch lr: 0.000456
batch 677 loss: 1.41634 acc: 0.70182 | v_loss: 1.32536 v_acc: 0.70378 |  iteration: 9379 teacher: 0 stage: sketch lr: 0.000456
batch 678 loss: 1.34359 acc: 0.70996 | v_loss: 1.33595 v_acc: 0.70215 |  iteration: 9380 teacher: 1 stage: sketch lr: 0.000456
batch 679 loss: 1.40356 acc: 0.69889 | v_loss: 1.33370 v_acc: 0.71615 |  iteration: 9381 teacher: 1 stage: sket

batch 737 loss: 1.40703 acc: 0.70475 | v_loss: 1.35619 v_acc: 0.70247 |  iteration: 9439 teacher: 0 stage: sketch lr: 0.000455
batch 738 loss: 1.41641 acc: 0.70410 | v_loss: 1.29143 v_acc: 0.70573 |  iteration: 9440 teacher: 1 stage: sketch lr: 0.000455
batch 739 loss: 1.49801 acc: 0.70215 | v_loss: 1.27051 v_acc: 0.70215 |  iteration: 9441 teacher: 1 stage: sketch lr: 0.000455
batch 740 loss: 1.48942 acc: 0.69531 | v_loss: 1.24474 v_acc: 0.73340 |  iteration: 9442 teacher: 0 stage: sketch lr: 0.000455
batch 741 loss: 1.52794 acc: 0.69889 | v_loss: 1.29560 v_acc: 0.71647 |  iteration: 9443 teacher: 1 stage: sketch lr: 0.000455
batch 742 loss: 1.42096 acc: 0.70931 | v_loss: 1.32396 v_acc: 0.74089 |  iteration: 9444 teacher: 1 stage: sketch lr: 0.000455
batch 743 loss: 1.48145 acc: 0.70182 | v_loss: 1.26151 v_acc: 0.72005 |  iteration: 9445 teacher: 0 stage: sketch lr: 0.000455
batch 744 loss: 1.50216 acc: 0.70378 | v_loss: 1.31076 v_acc: 0.71582 |  iteration: 9446 teacher: 1 stage: sket

batch 802 loss: 1.47222 acc: 0.69206 | v_loss: 1.35154 v_acc: 0.71680 |  iteration: 9504 teacher: 1 stage: sketch lr: 0.000453
batch 803 loss: 1.38461 acc: 0.70182 | v_loss: 1.37930 v_acc: 0.70345 |  iteration: 9505 teacher: 0 stage: sketch lr: 0.000453
batch 804 loss: 1.49704 acc: 0.69531 | v_loss: 1.31499 v_acc: 0.72201 |  iteration: 9506 teacher: 0 stage: sketch lr: 0.000453
batch 805 loss: 1.41549 acc: 0.70443 | v_loss: 1.32676 v_acc: 0.72005 |  iteration: 9507 teacher: 1 stage: sketch lr: 0.000453
batch 806 loss: 1.46959 acc: 0.70117 | v_loss: 1.50599 v_acc: 0.69043 |  iteration: 9508 teacher: 0 stage: sketch lr: 0.000453
batch 807 loss: 1.49191 acc: 0.70312 | v_loss: 1.35056 v_acc: 0.70833 |  iteration: 9509 teacher: 1 stage: sketch lr: 0.000453
batch 808 loss: 1.53427 acc: 0.69857 | v_loss: 1.29942 v_acc: 0.71582 |  iteration: 9510 teacher: 0 stage: sketch lr: 0.000453
batch 809 loss: 1.45177 acc: 0.70378 | v_loss: 1.29502 v_acc: 0.72201 |  iteration: 9511 teacher: 0 stage: sket

batch 867 loss: 1.48064 acc: 0.69238 | v_loss: 1.43761 v_acc: 0.70150 |  iteration: 9569 teacher: 0 stage: sketch lr: 0.000452
batch 868 loss: 1.57079 acc: 0.69271 | v_loss: 1.41798 v_acc: 0.71549 |  iteration: 9570 teacher: 1 stage: sketch lr: 0.000452
batch 869 loss: 1.44239 acc: 0.70866 | v_loss: 1.29054 v_acc: 0.72038 |  iteration: 9571 teacher: 1 stage: sketch lr: 0.000452
batch 870 loss: 1.38030 acc: 0.69792 | v_loss: 1.25411 v_acc: 0.72819 |  iteration: 9572 teacher: 0 stage: sketch lr: 0.000452
batch 871 loss: 1.46133 acc: 0.69727 | v_loss: 1.37079 v_acc: 0.71615 |  iteration: 9573 teacher: 0 stage: sketch lr: 0.000452
batch 872 loss: 1.55351 acc: 0.69531 | v_loss: 1.41300 v_acc: 0.70345 |  iteration: 9574 teacher: 1 stage: sketch lr: 0.000452
batch 873 loss: 1.40697 acc: 0.71387 | v_loss: 1.42334 v_acc: 0.70410 |  iteration: 9575 teacher: 1 stage: sketch lr: 0.000452
batch 874 loss: 1.49973 acc: 0.69759 | v_loss: 1.24801 v_acc: 0.71419 |  iteration: 9576 teacher: 0 stage: sket

batch 932 loss: 1.49078 acc: 0.69759 | v_loss: 1.27076 v_acc: 0.70573 |  iteration: 9634 teacher: 0 stage: sketch lr: 0.000450
batch 933 loss: 1.44094 acc: 0.69792 | v_loss: 1.53298 v_acc: 0.70443 |  iteration: 9635 teacher: 0 stage: sketch lr: 0.000450
batch 934 loss: 1.34263 acc: 0.71582 | v_loss: 1.24380 v_acc: 0.70833 |  iteration: 9636 teacher: 1 stage: sketch lr: 0.000450
batch 935 loss: 1.42682 acc: 0.70247 | v_loss: 1.32895 v_acc: 0.71257 |  iteration: 9637 teacher: 1 stage: sketch lr: 0.000450
batch 936 loss: 1.34074 acc: 0.71973 | v_loss: 1.36994 v_acc: 0.69076 |  iteration: 9638 teacher: 1 stage: sketch lr: 0.000450
batch 937 loss: 1.40559 acc: 0.70182 | v_loss: 1.30062 v_acc: 0.70898 |  iteration: 9639 teacher: 0 stage: sketch lr: 0.000450
batch 938 loss: 1.47122 acc: 0.70605 | v_loss: 1.37680 v_acc: 0.69206 |  iteration: 9640 teacher: 0 stage: sketch lr: 0.000450
batch 939 loss: 1.45329 acc: 0.70312 | v_loss: 1.48001 v_acc: 0.70931 |  iteration: 9641 teacher: 0 stage: sket

batch 997 loss: 1.48907 acc: 0.69596 | v_loss: 1.44445 v_acc: 0.70508 |  iteration: 9699 teacher: 1 stage: sketch lr: 0.000449
batch 998 loss: 1.62122 acc: 0.68327 | v_loss: 1.47266 v_acc: 0.70768 |  iteration: 9700 teacher: 1 stage: sketch lr: 0.000449
batch 999 loss: 1.40770 acc: 0.71094 | v_loss: 1.51719 v_acc: 0.68783 |  iteration: 9701 teacher: 0 stage: sketch lr: 0.000449
batch 1000 loss: 1.50757 acc: 0.70020 | v_loss: 1.46609 v_acc: 0.70410 |  iteration: 9702 teacher: 1 stage: sketch lr: 0.000449
batch 1001 loss: 1.45882 acc: 0.70247 | v_loss: 1.43972 v_acc: 0.71029 |  iteration: 9703 teacher: 1 stage: sketch lr: 0.000449
batch 1002 loss: 1.57004 acc: 0.68717 | v_loss: 1.42764 v_acc: 0.70508 |  iteration: 9704 teacher: 1 stage: sketch lr: 0.000449
batch 1003 loss: 1.43852 acc: 0.70866 | v_loss: 1.43917 v_acc: 0.70605 |  iteration: 9705 teacher: 0 stage: sketch lr: 0.000449
batch 1004 loss: 1.54844 acc: 0.70671 | v_loss: 1.29632 v_acc: 0.71257 |  iteration: 9706 teacher: 1 stage:

batch 1062 loss: 1.45896 acc: 0.70020 | v_loss: 1.22240 v_acc: 0.72559 |  iteration: 9764 teacher: 1 stage: sketch lr: 0.000447
batch 1063 loss: 1.40698 acc: 0.70020 | v_loss: 1.28728 v_acc: 0.71126 |  iteration: 9765 teacher: 0 stage: sketch lr: 0.000447
batch 1064 loss: 1.60269 acc: 0.69076 | v_loss: 1.46604 v_acc: 0.70671 |  iteration: 9766 teacher: 0 stage: sketch lr: 0.000447
batch 1065 loss: 1.30799 acc: 0.71615 | v_loss: 1.27958 v_acc: 0.71452 |  iteration: 9767 teacher: 0 stage: sketch lr: 0.000447
batch 1066 loss: 1.48042 acc: 0.70443 | v_loss: 1.43793 v_acc: 0.71647 |  iteration: 9768 teacher: 0 stage: sketch lr: 0.000447
batch 1067 loss: 1.44179 acc: 0.70736 | v_loss: 1.64496 v_acc: 0.69466 |  iteration: 9769 teacher: 0 stage: sketch lr: 0.000447
batch 1068 loss: 1.48754 acc: 0.69499 | v_loss: 1.51198 v_acc: 0.70410 |  iteration: 9770 teacher: 0 stage: sketch lr: 0.000447
batch 1069 loss: 1.47446 acc: 0.69792 | v_loss: 1.29721 v_acc: 0.72005 |  iteration: 9771 teacher: 0 sta

batch 1127 loss: 1.35480 acc: 0.70931 | v_loss: 1.31128 v_acc: 0.72103 |  iteration: 9829 teacher: 1 stage: sketch lr: 0.000446
batch 1128 loss: 1.49160 acc: 0.69824 | v_loss: 1.57238 v_acc: 0.69043 |  iteration: 9830 teacher: 0 stage: sketch lr: 0.000446
batch 1129 loss: 1.46484 acc: 0.70931 | v_loss: 1.43151 v_acc: 0.70215 |  iteration: 9831 teacher: 1 stage: sketch lr: 0.000446
batch 1130 loss: 1.39182 acc: 0.71257 | v_loss: 1.51952 v_acc: 0.68913 |  iteration: 9832 teacher: 1 stage: sketch lr: 0.000446
batch 1131 loss: 1.51165 acc: 0.69954 | v_loss: 1.39241 v_acc: 0.69759 |  iteration: 9833 teacher: 0 stage: sketch lr: 0.000446
batch 1132 loss: 1.40204 acc: 0.70443 | v_loss: 1.33091 v_acc: 0.70215 |  iteration: 9834 teacher: 0 stage: sketch lr: 0.000446
batch 1133 loss: 1.43186 acc: 0.70117 | v_loss: 1.35116 v_acc: 0.70020 |  iteration: 9835 teacher: 1 stage: sketch lr: 0.000446
batch 1134 loss: 1.42051 acc: 0.70931 | v_loss: 1.34850 v_acc: 0.71484 |  iteration: 9836 teacher: 0 sta

batch 1192 loss: 1.47564 acc: 0.69499 | v_loss: 1.38597 v_acc: 0.69727 |  iteration: 9894 teacher: 1 stage: sketch lr: 0.000444
batch 1193 loss: 1.43457 acc: 0.69922 | v_loss: 1.26584 v_acc: 0.71126 |  iteration: 9895 teacher: 0 stage: sketch lr: 0.000444
batch 1194 loss: 1.48664 acc: 0.69824 | v_loss: 1.23555 v_acc: 0.70345 |  iteration: 9896 teacher: 0 stage: sketch lr: 0.000444
batch 1195 loss: 1.50872 acc: 0.69206 | v_loss: 1.26930 v_acc: 0.73503 |  iteration: 9897 teacher: 0 stage: sketch lr: 0.000444
batch 1196 loss: 1.44406 acc: 0.70671 | v_loss: 1.27920 v_acc: 0.72233 |  iteration: 9898 teacher: 0 stage: sketch lr: 0.000444
batch 1197 loss: 1.57536 acc: 0.69434 | v_loss: 1.36344 v_acc: 0.74089 |  iteration: 9899 teacher: 1 stage: sketch lr: 0.000444
batch 1198 loss: 1.40373 acc: 0.70085 | v_loss: 1.27211 v_acc: 0.72005 |  iteration: 9900 teacher: 1 stage: sketch lr: 0.000444
batch 1199 loss: 1.46166 acc: 0.70020 | v_loss: 1.30521 v_acc: 0.71582 |  iteration: 9901 teacher: 0 sta

batch 13 loss: 1.53403 acc: 0.69792 | v_loss: 1.27657 v_acc: 0.72168 |  iteration: 9958 teacher: 1 stage: sketch lr: 0.000443
batch 14 loss: 1.54843 acc: 0.69434 | v_loss: 1.31987 v_acc: 0.74089 |  iteration: 9959 teacher: 1 stage: sketch lr: 0.000443
batch 15 loss: 1.36952 acc: 0.71061 | v_loss: 1.25929 v_acc: 0.72005 |  iteration: 9960 teacher: 1 stage: sketch lr: 0.000443
batch 16 loss: 1.54674 acc: 0.69108 | v_loss: 1.30246 v_acc: 0.71582 |  iteration: 9961 teacher: 1 stage: sketch lr: 0.000443
batch 17 loss: 1.43229 acc: 0.70703 | v_loss: 1.41244 v_acc: 0.71224 |  iteration: 9962 teacher: 1 stage: sketch lr: 0.000443
batch 18 loss: 1.48946 acc: 0.69661 | v_loss: 1.38849 v_acc: 0.72038 |  iteration: 9963 teacher: 0 stage: sketch lr: 0.000443
batch 19 loss: 1.46774 acc: 0.70638 | v_loss: 1.48652 v_acc: 0.69661 |  iteration: 9964 teacher: 0 stage: sketch lr: 0.000443
batch 20 loss: 1.56256 acc: 0.69987 | v_loss: 1.42090 v_acc: 0.71745 |  iteration: 9965 teacher: 0 stage: sketch lr: 0

batch 78 loss: 1.47886 acc: 0.69629 | v_loss: 1.51466 v_acc: 0.69043 |  iteration: 10023 teacher: 1 stage: sketch lr: 0.000441
batch 79 loss: 1.46137 acc: 0.69401 | v_loss: 1.34889 v_acc: 0.70833 |  iteration: 10024 teacher: 0 stage: sketch lr: 0.000441
batch 80 loss: 1.44779 acc: 0.70280 | v_loss: 1.29526 v_acc: 0.71322 |  iteration: 10025 teacher: 0 stage: sketch lr: 0.000441
batch 81 loss: 1.41103 acc: 0.70508 | v_loss: 1.29979 v_acc: 0.72201 |  iteration: 10026 teacher: 1 stage: sketch lr: 0.000441
batch 82 loss: 1.45368 acc: 0.70540 | v_loss: 1.42832 v_acc: 0.70768 |  iteration: 10027 teacher: 0 stage: sketch lr: 0.000441
batch 83 loss: 1.46419 acc: 0.69238 | v_loss: 1.32625 v_acc: 0.73145 |  iteration: 10028 teacher: 1 stage: sketch lr: 0.000441
batch 84 loss: 1.44595 acc: 0.70573 | v_loss: 1.54507 v_acc: 0.71615 |  iteration: 10029 teacher: 1 stage: sketch lr: 0.000441
batch 85 loss: 1.40196 acc: 0.70052 | v_loss: 1.28882 v_acc: 0.69759 |  iteration: 10030 teacher: 0 stage: sket

batch 143 loss: 1.63121 acc: 0.69173 | v_loss: 1.49224 v_acc: 0.71354 |  iteration: 10088 teacher: 0 stage: sketch lr: 0.000440
batch 144 loss: 1.46492 acc: 0.71029 | v_loss: 1.44584 v_acc: 0.70378 |  iteration: 10089 teacher: 0 stage: sketch lr: 0.000440
batch 145 loss: 1.59469 acc: 0.67773 | v_loss: 1.42678 v_acc: 0.70475 |  iteration: 10090 teacher: 0 stage: sketch lr: 0.000440
batch 146 loss: 1.41804 acc: 0.70410 | v_loss: 1.24011 v_acc: 0.71484 |  iteration: 10091 teacher: 0 stage: sketch lr: 0.000440
batch 147 loss: 1.43458 acc: 0.69987 | v_loss: 1.40267 v_acc: 0.72884 |  iteration: 10092 teacher: 0 stage: sketch lr: 0.000440
batch 148 loss: 1.49073 acc: 0.69596 | v_loss: 1.47730 v_acc: 0.69824 |  iteration: 10093 teacher: 1 stage: sketch lr: 0.000440
batch 149 loss: 1.42195 acc: 0.70312 | v_loss: 1.42980 v_acc: 0.72005 |  iteration: 10094 teacher: 1 stage: sketch lr: 0.000440
batch 150 loss: 1.38636 acc: 0.70020 | v_loss: 1.26066 v_acc: 0.71908 |  iteration: 10095 teacher: 1 sta

batch 208 loss: 1.37589 acc: 0.71322 | v_loss: 1.37086 v_acc: 0.69076 |  iteration: 10153 teacher: 1 stage: sketch lr: 0.000439
batch 209 loss: 1.43283 acc: 0.70475 | v_loss: 1.31757 v_acc: 0.70898 |  iteration: 10154 teacher: 0 stage: sketch lr: 0.000439
batch 210 loss: 1.35279 acc: 0.70312 | v_loss: 1.37344 v_acc: 0.69206 |  iteration: 10155 teacher: 0 stage: sketch lr: 0.000439
batch 211 loss: 1.44193 acc: 0.70768 | v_loss: 1.47321 v_acc: 0.70931 |  iteration: 10156 teacher: 0 stage: sketch lr: 0.000439
batch 212 loss: 1.49283 acc: 0.69857 | v_loss: 1.31822 v_acc: 0.72689 |  iteration: 10157 teacher: 0 stage: sketch lr: 0.000439
batch 213 loss: 1.38363 acc: 0.71094 | v_loss: 1.44305 v_acc: 0.70540 |  iteration: 10158 teacher: 0 stage: sketch lr: 0.000438
batch 214 loss: 1.48881 acc: 0.70443 | v_loss: 1.39057 v_acc: 0.69889 |  iteration: 10159 teacher: 1 stage: sketch lr: 0.000438
batch 215 loss: 1.40867 acc: 0.70801 | v_loss: 1.31912 v_acc: 0.70801 |  iteration: 10160 teacher: 1 sta

batch 273 loss: 1.41538 acc: 0.70638 | v_loss: 1.42722 v_acc: 0.70085 |  iteration: 10218 teacher: 0 stage: sketch lr: 0.000437
batch 274 loss: 1.47980 acc: 0.71257 | v_loss: 1.40141 v_acc: 0.70931 |  iteration: 10219 teacher: 0 stage: sketch lr: 0.000437
batch 275 loss: 1.43386 acc: 0.70052 | v_loss: 1.40795 v_acc: 0.70703 |  iteration: 10220 teacher: 0 stage: sketch lr: 0.000437
batch 276 loss: 1.40378 acc: 0.69987 | v_loss: 1.26622 v_acc: 0.71875 |  iteration: 10221 teacher: 0 stage: sketch lr: 0.000437
batch 277 loss: 1.43409 acc: 0.70866 | v_loss: 1.32596 v_acc: 0.72331 |  iteration: 10222 teacher: 1 stage: sketch lr: 0.000437
batch 278 loss: 1.47442 acc: 0.70312 | v_loss: 1.19367 v_acc: 0.71517 |  iteration: 10223 teacher: 0 stage: sketch lr: 0.000437
batch 279 loss: 1.41083 acc: 0.70573 | v_loss: 1.35249 v_acc: 0.70312 |  iteration: 10224 teacher: 0 stage: sketch lr: 0.000437
batch 280 loss: 1.45010 acc: 0.71159 | v_loss: 1.51300 v_acc: 0.70117 |  iteration: 10225 teacher: 0 sta

batch 338 loss: 1.49148 acc: 0.69727 | v_loss: 1.43578 v_acc: 0.72949 |  iteration: 10283 teacher: 1 stage: sketch lr: 0.000436
batch 339 loss: 1.51207 acc: 0.69987 | v_loss: 1.64010 v_acc: 0.69466 |  iteration: 10284 teacher: 0 stage: sketch lr: 0.000436
batch 340 loss: 1.43804 acc: 0.70378 | v_loss: 1.50808 v_acc: 0.70410 |  iteration: 10285 teacher: 1 stage: sketch lr: 0.000436
batch 341 loss: 1.46655 acc: 0.70247 | v_loss: 1.29719 v_acc: 0.72005 |  iteration: 10286 teacher: 1 stage: sketch lr: 0.000436
batch 342 loss: 1.43512 acc: 0.70378 | v_loss: 1.37235 v_acc: 0.70410 |  iteration: 10287 teacher: 1 stage: sketch lr: 0.000436
batch 343 loss: 1.40824 acc: 0.69889 | v_loss: 1.22290 v_acc: 0.71973 |  iteration: 10288 teacher: 1 stage: sketch lr: 0.000436
batch 344 loss: 1.55319 acc: 0.69661 | v_loss: 1.42197 v_acc: 0.70378 |  iteration: 10289 teacher: 0 stage: sketch lr: 0.000436
batch 345 loss: 1.47066 acc: 0.70378 | v_loss: 1.36447 v_acc: 0.71810 |  iteration: 10290 teacher: 0 sta

batch 403 loss: 1.46729 acc: 0.70312 | v_loss: 1.37114 v_acc: 0.69759 |  iteration: 10348 teacher: 1 stage: sketch lr: 0.000434
batch 404 loss: 1.41268 acc: 0.70833 | v_loss: 1.32652 v_acc: 0.70215 |  iteration: 10349 teacher: 0 stage: sketch lr: 0.000434
batch 405 loss: 1.45916 acc: 0.70573 | v_loss: 1.34809 v_acc: 0.69889 |  iteration: 10350 teacher: 0 stage: sketch lr: 0.000434
batch 406 loss: 1.54672 acc: 0.68750 | v_loss: 1.33678 v_acc: 0.71419 |  iteration: 10351 teacher: 0 stage: sketch lr: 0.000434
batch 407 loss: 1.32968 acc: 0.71387 | v_loss: 1.53908 v_acc: 0.69173 |  iteration: 10352 teacher: 0 stage: sketch lr: 0.000434
batch 408 loss: 1.46044 acc: 0.70020 | v_loss: 1.40035 v_acc: 0.70443 |  iteration: 10353 teacher: 1 stage: sketch lr: 0.000434
batch 409 loss: 1.34438 acc: 0.71354 | v_loss: 1.33698 v_acc: 0.71061 |  iteration: 10354 teacher: 1 stage: sketch lr: 0.000434
batch 410 loss: 1.44646 acc: 0.70182 | v_loss: 1.37934 v_acc: 0.71452 |  iteration: 10355 teacher: 1 sta

batch 468 loss: 1.39832 acc: 0.69987 | v_loss: 1.28409 v_acc: 0.71647 |  iteration: 10413 teacher: 0 stage: sketch lr: 0.000433
batch 469 loss: 1.36160 acc: 0.70801 | v_loss: 1.37693 v_acc: 0.74089 |  iteration: 10414 teacher: 1 stage: sketch lr: 0.000433
batch 470 loss: 1.45219 acc: 0.69727 | v_loss: 1.27090 v_acc: 0.72461 |  iteration: 10415 teacher: 0 stage: sketch lr: 0.000433
batch 471 loss: 1.45230 acc: 0.70247 | v_loss: 1.31023 v_acc: 0.72103 |  iteration: 10416 teacher: 0 stage: sketch lr: 0.000433
batch 472 loss: 1.43110 acc: 0.70247 | v_loss: 1.41678 v_acc: 0.71354 |  iteration: 10417 teacher: 0 stage: sketch lr: 0.000433
batch 473 loss: 1.47170 acc: 0.70443 | v_loss: 1.40698 v_acc: 0.72428 |  iteration: 10418 teacher: 1 stage: sketch lr: 0.000433
batch 474 loss: 1.37623 acc: 0.70020 | v_loss: 1.50802 v_acc: 0.70182 |  iteration: 10419 teacher: 0 stage: sketch lr: 0.000433
batch 475 loss: 1.50956 acc: 0.70085 | v_loss: 1.41446 v_acc: 0.71777 |  iteration: 10420 teacher: 1 sta

batch 533 loss: 1.46255 acc: 0.70833 | v_loss: 1.50585 v_acc: 0.69043 |  iteration: 10478 teacher: 1 stage: sketch lr: 0.000432
batch 534 loss: 1.47094 acc: 0.69792 | v_loss: 1.33617 v_acc: 0.70964 |  iteration: 10479 teacher: 1 stage: sketch lr: 0.000432
batch 535 loss: 1.52141 acc: 0.69792 | v_loss: 1.30340 v_acc: 0.71354 |  iteration: 10480 teacher: 0 stage: sketch lr: 0.000432
batch 536 loss: 1.43888 acc: 0.70508 | v_loss: 1.30332 v_acc: 0.71712 |  iteration: 10481 teacher: 0 stage: sketch lr: 0.000432
batch 537 loss: 1.47270 acc: 0.69466 | v_loss: 1.42935 v_acc: 0.70540 |  iteration: 10482 teacher: 1 stage: sketch lr: 0.000432
batch 538 loss: 1.42749 acc: 0.71061 | v_loss: 1.31070 v_acc: 0.73112 |  iteration: 10483 teacher: 1 stage: sketch lr: 0.000432
batch 539 loss: 1.57687 acc: 0.68490 | v_loss: 1.53252 v_acc: 0.71354 |  iteration: 10484 teacher: 1 stage: sketch lr: 0.000432
batch 540 loss: 1.42255 acc: 0.69922 | v_loss: 1.29115 v_acc: 0.69922 |  iteration: 10485 teacher: 0 sta

batch 598 loss: 1.44341 acc: 0.69857 | v_loss: 1.37837 v_acc: 0.71647 |  iteration: 10543 teacher: 1 stage: sketch lr: 0.000430
batch 599 loss: 1.50769 acc: 0.69531 | v_loss: 1.42876 v_acc: 0.70052 |  iteration: 10544 teacher: 0 stage: sketch lr: 0.000430
batch 600 loss: 1.38256 acc: 0.70768 | v_loss: 1.42673 v_acc: 0.70312 |  iteration: 10545 teacher: 1 stage: sketch lr: 0.000430
batch 601 loss: 1.46875 acc: 0.69336 | v_loss: 1.23690 v_acc: 0.71419 |  iteration: 10546 teacher: 0 stage: sketch lr: 0.000430
batch 602 loss: 1.46576 acc: 0.69368 | v_loss: 1.40359 v_acc: 0.72819 |  iteration: 10547 teacher: 1 stage: sketch lr: 0.000430
batch 603 loss: 1.48686 acc: 0.70671 | v_loss: 1.47172 v_acc: 0.69792 |  iteration: 10548 teacher: 1 stage: sketch lr: 0.000430
batch 604 loss: 1.42016 acc: 0.69661 | v_loss: 1.39194 v_acc: 0.71842 |  iteration: 10549 teacher: 0 stage: sketch lr: 0.000430
batch 605 loss: 1.30606 acc: 0.72201 | v_loss: 1.25765 v_acc: 0.71615 |  iteration: 10550 teacher: 1 sta

batch 663 loss: 1.48965 acc: 0.70573 | v_loss: 1.37190 v_acc: 0.69076 |  iteration: 10608 teacher: 0 stage: sketch lr: 0.000429
batch 664 loss: 1.50046 acc: 0.69238 | v_loss: 1.30659 v_acc: 0.70898 |  iteration: 10609 teacher: 1 stage: sketch lr: 0.000429
batch 665 loss: 1.52855 acc: 0.69401 | v_loss: 1.37067 v_acc: 0.69206 |  iteration: 10610 teacher: 0 stage: sketch lr: 0.000429
batch 666 loss: 1.54628 acc: 0.68815 | v_loss: 1.50420 v_acc: 0.70931 |  iteration: 10611 teacher: 1 stage: sketch lr: 0.000429
batch 667 loss: 1.44831 acc: 0.71322 | v_loss: 1.32732 v_acc: 0.72363 |  iteration: 10612 teacher: 1 stage: sketch lr: 0.000429
batch 668 loss: 1.53955 acc: 0.69434 | v_loss: 1.48318 v_acc: 0.70117 |  iteration: 10613 teacher: 1 stage: sketch lr: 0.000429
batch 669 loss: 1.39781 acc: 0.70638 | v_loss: 1.36189 v_acc: 0.70085 |  iteration: 10614 teacher: 1 stage: sketch lr: 0.000429
batch 670 loss: 1.47104 acc: 0.70540 | v_loss: 1.35568 v_acc: 0.70703 |  iteration: 10615 teacher: 1 sta

batch 728 loss: 1.48804 acc: 0.69434 | v_loss: 1.42870 v_acc: 0.70085 |  iteration: 10673 teacher: 1 stage: sketch lr: 0.000428
batch 729 loss: 1.41779 acc: 0.71159 | v_loss: 1.41864 v_acc: 0.70768 |  iteration: 10674 teacher: 1 stage: sketch lr: 0.000428
batch 730 loss: 1.39003 acc: 0.70801 | v_loss: 1.39648 v_acc: 0.70312 |  iteration: 10675 teacher: 1 stage: sketch lr: 0.000428
batch 731 loss: 1.49338 acc: 0.70052 | v_loss: 1.26517 v_acc: 0.71257 |  iteration: 10676 teacher: 0 stage: sketch lr: 0.000428
batch 732 loss: 1.45674 acc: 0.70540 | v_loss: 1.32785 v_acc: 0.72493 |  iteration: 10677 teacher: 0 stage: sketch lr: 0.000428
batch 733 loss: 1.58748 acc: 0.69043 | v_loss: 1.19926 v_acc: 0.70638 |  iteration: 10678 teacher: 0 stage: sketch lr: 0.000428
batch 734 loss: 1.41148 acc: 0.70508 | v_loss: 1.34636 v_acc: 0.70247 |  iteration: 10679 teacher: 0 stage: sketch lr: 0.000428
batch 735 loss: 1.47290 acc: 0.70801 | v_loss: 1.49633 v_acc: 0.69987 |  iteration: 10680 teacher: 0 sta

batch 793 loss: 1.42430 acc: 0.70085 | v_loss: 1.52492 v_acc: 0.68848 |  iteration: 10738 teacher: 0 stage: sketch lr: 0.000426
batch 794 loss: 1.41607 acc: 0.70866 | v_loss: 1.73041 v_acc: 0.68913 |  iteration: 10739 teacher: 1 stage: sketch lr: 0.000426
batch 795 loss: 1.46083 acc: 0.69792 | v_loss: 1.55912 v_acc: 0.69531 |  iteration: 10740 teacher: 0 stage: sketch lr: 0.000426
batch 796 loss: 1.43528 acc: 0.69889 | v_loss: 1.29684 v_acc: 0.72396 |  iteration: 10741 teacher: 1 stage: sketch lr: 0.000426
batch 797 loss: 1.63175 acc: 0.68132 | v_loss: 1.36531 v_acc: 0.70866 |  iteration: 10742 teacher: 0 stage: sketch lr: 0.000426
batch 798 loss: 1.45826 acc: 0.69824 | v_loss: 1.22401 v_acc: 0.72103 |  iteration: 10743 teacher: 1 stage: sketch lr: 0.000426
batch 799 loss: 1.32754 acc: 0.71029 | v_loss: 1.42670 v_acc: 0.69857 |  iteration: 10744 teacher: 0 stage: sketch lr: 0.000426
batch 800 loss: 1.40653 acc: 0.69499 | v_loss: 1.36312 v_acc: 0.70964 |  iteration: 10745 teacher: 0 sta

batch 858 loss: 1.32056 acc: 0.71875 | v_loss: 1.36982 v_acc: 0.70182 |  iteration: 10803 teacher: 1 stage: sketch lr: 0.000425
batch 859 loss: 1.53683 acc: 0.69271 | v_loss: 1.32463 v_acc: 0.70508 |  iteration: 10804 teacher: 1 stage: sketch lr: 0.000425
batch 860 loss: 1.51490 acc: 0.69759 | v_loss: 1.32913 v_acc: 0.70378 |  iteration: 10805 teacher: 1 stage: sketch lr: 0.000425
batch 861 loss: 1.48483 acc: 0.69759 | v_loss: 1.32579 v_acc: 0.71419 |  iteration: 10806 teacher: 1 stage: sketch lr: 0.000425
batch 862 loss: 1.36362 acc: 0.70247 | v_loss: 1.52604 v_acc: 0.69173 |  iteration: 10807 teacher: 0 stage: sketch lr: 0.000425
batch 863 loss: 1.47605 acc: 0.70703 | v_loss: 1.37169 v_acc: 0.70247 |  iteration: 10808 teacher: 1 stage: sketch lr: 0.000425
batch 864 loss: 1.55103 acc: 0.69466 | v_loss: 1.36046 v_acc: 0.71126 |  iteration: 10809 teacher: 0 stage: sketch lr: 0.000425
batch 865 loss: 1.40502 acc: 0.70801 | v_loss: 1.39208 v_acc: 0.71810 |  iteration: 10810 teacher: 1 sta

batch 923 loss: 1.45038 acc: 0.70117 | v_loss: 1.27719 v_acc: 0.71647 |  iteration: 10868 teacher: 1 stage: sketch lr: 0.000424
batch 924 loss: 1.34748 acc: 0.71517 | v_loss: 1.34123 v_acc: 0.74089 |  iteration: 10869 teacher: 1 stage: sketch lr: 0.000424
batch 925 loss: 1.59665 acc: 0.68392 | v_loss: 1.26382 v_acc: 0.72005 |  iteration: 10870 teacher: 0 stage: sketch lr: 0.000424
batch 926 loss: 1.52537 acc: 0.68848 | v_loss: 1.30778 v_acc: 0.71582 |  iteration: 10871 teacher: 0 stage: sketch lr: 0.000424
batch 927 loss: 1.39864 acc: 0.70768 | v_loss: 1.41945 v_acc: 0.71289 |  iteration: 10872 teacher: 1 stage: sketch lr: 0.000424
batch 928 loss: 1.41055 acc: 0.70247 | v_loss: 1.39251 v_acc: 0.72298 |  iteration: 10873 teacher: 0 stage: sketch lr: 0.000424
batch 929 loss: 1.37496 acc: 0.70605 | v_loss: 1.48368 v_acc: 0.69922 |  iteration: 10874 teacher: 1 stage: sketch lr: 0.000424
batch 930 loss: 1.48760 acc: 0.69531 | v_loss: 1.42923 v_acc: 0.72135 |  iteration: 10875 teacher: 0 sta

batch 988 loss: 1.37378 acc: 0.71419 | v_loss: 1.49942 v_acc: 0.69043 |  iteration: 10933 teacher: 1 stage: sketch lr: 0.000423
batch 989 loss: 1.33434 acc: 0.71159 | v_loss: 1.34031 v_acc: 0.70833 |  iteration: 10934 teacher: 0 stage: sketch lr: 0.000423
batch 990 loss: 1.44165 acc: 0.70508 | v_loss: 1.29056 v_acc: 0.71582 |  iteration: 10935 teacher: 1 stage: sketch lr: 0.000423
batch 991 loss: 1.50399 acc: 0.69368 | v_loss: 1.28381 v_acc: 0.72201 |  iteration: 10936 teacher: 0 stage: sketch lr: 0.000423
batch 992 loss: 1.47060 acc: 0.70475 | v_loss: 1.41937 v_acc: 0.70768 |  iteration: 10937 teacher: 1 stage: sketch lr: 0.000423
batch 993 loss: 1.44208 acc: 0.69694 | v_loss: 1.31215 v_acc: 0.73112 |  iteration: 10938 teacher: 0 stage: sketch lr: 0.000423
batch 994 loss: 1.41588 acc: 0.70182 | v_loss: 1.55574 v_acc: 0.71582 |  iteration: 10939 teacher: 1 stage: sketch lr: 0.000423
batch 995 loss: 1.59431 acc: 0.68620 | v_loss: 1.29723 v_acc: 0.70150 |  iteration: 10940 teacher: 1 sta

batch 1052 loss: 1.43144 acc: 0.70671 | v_loss: 1.24980 v_acc: 0.72526 |  iteration: 10997 teacher: 1 stage: sketch lr: 0.000421
batch 1053 loss: 1.48254 acc: 0.70215 | v_loss: 1.37854 v_acc: 0.71745 |  iteration: 10998 teacher: 0 stage: sketch lr: 0.000421
batch 1054 loss: 1.44590 acc: 0.69466 | v_loss: 1.41849 v_acc: 0.70345 |  iteration: 10999 teacher: 0 stage: sketch lr: 0.000421
batch 1055 loss: 1.46523 acc: 0.69759 | v_loss: 1.43178 v_acc: 0.70410 |  iteration: 11000 teacher: 0 stage: sketch lr: 0.000421
batch 1056 loss: 1.47656 acc: 0.69857 | v_loss: 1.23150 v_acc: 0.71484 |  iteration: 11001 teacher: 0 stage: sketch lr: 0.000421
batch 1057 loss: 1.50864 acc: 0.69173 | v_loss: 1.41040 v_acc: 0.72852 |  iteration: 11002 teacher: 1 stage: sketch lr: 0.000421
batch 1058 loss: 1.51398 acc: 0.69727 | v_loss: 1.48293 v_acc: 0.69694 |  iteration: 11003 teacher: 1 stage: sketch lr: 0.000421
batch 1059 loss: 1.49843 acc: 0.69629 | v_loss: 1.39408 v_acc: 0.72786 |  iteration: 11004 teache

batch 1116 loss: 1.50911 acc: 0.69238 | v_loss: 1.22828 v_acc: 0.71647 |  iteration: 11061 teacher: 0 stage: sketch lr: 0.000420
batch 1117 loss: 1.40512 acc: 0.69857 | v_loss: 1.32656 v_acc: 0.71159 |  iteration: 11062 teacher: 1 stage: sketch lr: 0.000420
batch 1118 loss: 1.39330 acc: 0.70898 | v_loss: 1.35862 v_acc: 0.69336 |  iteration: 11063 teacher: 0 stage: sketch lr: 0.000420
batch 1119 loss: 1.40881 acc: 0.70801 | v_loss: 1.29725 v_acc: 0.71191 |  iteration: 11064 teacher: 1 stage: sketch lr: 0.000420
batch 1120 loss: 1.41391 acc: 0.70085 | v_loss: 1.35501 v_acc: 0.70215 |  iteration: 11065 teacher: 0 stage: sketch lr: 0.000420
batch 1121 loss: 1.46561 acc: 0.70215 | v_loss: 1.48156 v_acc: 0.72005 |  iteration: 11066 teacher: 0 stage: sketch lr: 0.000420
batch 1122 loss: 1.39513 acc: 0.69857 | v_loss: 1.32887 v_acc: 0.72786 |  iteration: 11067 teacher: 0 stage: sketch lr: 0.000420
batch 1123 loss: 1.48214 acc: 0.70215 | v_loss: 1.46534 v_acc: 0.70540 |  iteration: 11068 teache

batch 1180 loss: 1.44423 acc: 0.70312 | v_loss: 1.46588 v_acc: 0.70573 |  iteration: 11125 teacher: 0 stage: sketch lr: 0.000419
batch 1181 loss: 1.50458 acc: 0.69759 | v_loss: 1.51617 v_acc: 0.68978 |  iteration: 11126 teacher: 1 stage: sketch lr: 0.000419
batch 1182 loss: 1.38955 acc: 0.70540 | v_loss: 1.46329 v_acc: 0.70540 |  iteration: 11127 teacher: 1 stage: sketch lr: 0.000419
batch 1183 loss: 1.36098 acc: 0.71257 | v_loss: 1.43404 v_acc: 0.70540 |  iteration: 11128 teacher: 1 stage: sketch lr: 0.000419
batch 1184 loss: 1.38059 acc: 0.71387 | v_loss: 1.41684 v_acc: 0.70768 |  iteration: 11129 teacher: 1 stage: sketch lr: 0.000419
batch 1185 loss: 1.34090 acc: 0.72135 | v_loss: 1.40362 v_acc: 0.70475 |  iteration: 11130 teacher: 0 stage: sketch lr: 0.000419
batch 1186 loss: 1.38720 acc: 0.70638 | v_loss: 1.26415 v_acc: 0.71354 |  iteration: 11131 teacher: 1 stage: sketch lr: 0.000419
batch 1187 loss: 1.48415 acc: 0.70085 | v_loss: 1.33588 v_acc: 0.72461 |  iteration: 11132 teache

epoch: 9
__________________________________________
batch 0 loss: 1.45894 acc: 0.69889 | v_loss: 1.43612 v_acc: 0.70410 |  iteration: 11188 teacher: 0 stage: sketch lr: 0.000418
batch 1 loss: 1.47216 acc: 0.70150 | v_loss: 1.42469 v_acc: 0.70768 |  iteration: 11189 teacher: 1 stage: sketch lr: 0.000418
batch 2 loss: 1.40967 acc: 0.69141 | v_loss: 1.40909 v_acc: 0.70475 |  iteration: 11190 teacher: 0 stage: sketch lr: 0.000418
batch 3 loss: 1.41004 acc: 0.70898 | v_loss: 1.26736 v_acc: 0.71354 |  iteration: 11191 teacher: 0 stage: sketch lr: 0.000418
batch 4 loss: 1.38835 acc: 0.70996 | v_loss: 1.32814 v_acc: 0.72461 |  iteration: 11192 teacher: 1 stage: sketch lr: 0.000418
batch 5 loss: 1.42426 acc: 0.70540 | v_loss: 1.17698 v_acc: 0.71615 |  iteration: 11193 teacher: 1 stage: sketch lr: 0.000418
batch 6 loss: 1.47641 acc: 0.70117 | v_loss: 1.34330 v_acc: 0.71126 |  iteration: 11194 teacher: 0 stage: sketch lr: 0.000418
batch 7 loss: 1.43269 acc: 0.70443 | v_loss: 1.51757 v_acc: 0.6995

batch 65 loss: 1.45481 acc: 0.70117 | v_loss: 1.50189 v_acc: 0.70475 |  iteration: 11253 teacher: 1 stage: sketch lr: 0.000417
batch 66 loss: 1.47896 acc: 0.69141 | v_loss: 1.68713 v_acc: 0.69336 |  iteration: 11254 teacher: 1 stage: sketch lr: 0.000417
batch 67 loss: 1.50270 acc: 0.68359 | v_loss: 1.54068 v_acc: 0.69727 |  iteration: 11255 teacher: 1 stage: sketch lr: 0.000417
batch 68 loss: 1.39849 acc: 0.70312 | v_loss: 1.29765 v_acc: 0.72396 |  iteration: 11256 teacher: 0 stage: sketch lr: 0.000417
batch 69 loss: 1.47452 acc: 0.69922 | v_loss: 1.36717 v_acc: 0.70996 |  iteration: 11257 teacher: 1 stage: sketch lr: 0.000417
batch 70 loss: 1.44254 acc: 0.70671 | v_loss: 1.22455 v_acc: 0.71973 |  iteration: 11258 teacher: 1 stage: sketch lr: 0.000417
batch 71 loss: 1.43464 acc: 0.69792 | v_loss: 1.40626 v_acc: 0.70150 |  iteration: 11259 teacher: 1 stage: sketch lr: 0.000417
batch 72 loss: 1.52186 acc: 0.69108 | v_loss: 1.36535 v_acc: 0.71029 |  iteration: 11260 teacher: 0 stage: sket

batch 130 loss: 1.47609 acc: 0.69954 | v_loss: 1.37497 v_acc: 0.70182 |  iteration: 11318 teacher: 0 stage: sketch lr: 0.000415
batch 131 loss: 1.42036 acc: 0.70443 | v_loss: 1.32043 v_acc: 0.70638 |  iteration: 11319 teacher: 0 stage: sketch lr: 0.000415
batch 132 loss: 1.54374 acc: 0.68620 | v_loss: 1.32371 v_acc: 0.70410 |  iteration: 11320 teacher: 1 stage: sketch lr: 0.000415
batch 133 loss: 1.44436 acc: 0.70182 | v_loss: 1.33058 v_acc: 0.71842 |  iteration: 11321 teacher: 0 stage: sketch lr: 0.000415
batch 134 loss: 1.47390 acc: 0.69987 | v_loss: 1.56013 v_acc: 0.69108 |  iteration: 11322 teacher: 1 stage: sketch lr: 0.000415
batch 135 loss: 1.46384 acc: 0.70052 | v_loss: 1.38218 v_acc: 0.71126 |  iteration: 11323 teacher: 1 stage: sketch lr: 0.000415
batch 136 loss: 1.40962 acc: 0.69889 | v_loss: 1.37227 v_acc: 0.71061 |  iteration: 11324 teacher: 0 stage: sketch lr: 0.000415
batch 137 loss: 1.47041 acc: 0.70150 | v_loss: 1.38545 v_acc: 0.71452 |  iteration: 11325 teacher: 1 sta

batch 195 loss: 1.36401 acc: 0.70768 | v_loss: 1.26638 v_acc: 0.72461 |  iteration: 11383 teacher: 1 stage: sketch lr: 0.000414
batch 196 loss: 1.41062 acc: 0.70443 | v_loss: 1.34557 v_acc: 0.72689 |  iteration: 11384 teacher: 1 stage: sketch lr: 0.000414
batch 197 loss: 1.46407 acc: 0.69694 | v_loss: 1.26996 v_acc: 0.72852 |  iteration: 11385 teacher: 1 stage: sketch lr: 0.000414
batch 198 loss: 1.53315 acc: 0.69043 | v_loss: 1.31166 v_acc: 0.72103 |  iteration: 11386 teacher: 0 stage: sketch lr: 0.000414
batch 199 loss: 1.52102 acc: 0.69596 | v_loss: 1.41773 v_acc: 0.71354 |  iteration: 11387 teacher: 0 stage: sketch lr: 0.000414
batch 200 loss: 1.41475 acc: 0.70150 | v_loss: 1.39483 v_acc: 0.72721 |  iteration: 11388 teacher: 1 stage: sketch lr: 0.000414
batch 201 loss: 1.48544 acc: 0.70052 | v_loss: 1.48765 v_acc: 0.70150 |  iteration: 11389 teacher: 0 stage: sketch lr: 0.000414
batch 202 loss: 1.43789 acc: 0.69889 | v_loss: 1.41697 v_acc: 0.72135 |  iteration: 11390 teacher: 0 sta

batch 260 loss: 1.54672 acc: 0.69954 | v_loss: 1.46137 v_acc: 0.69368 |  iteration: 11448 teacher: 1 stage: sketch lr: 0.000413
batch 261 loss: 1.41784 acc: 0.70703 | v_loss: 1.34055 v_acc: 0.71452 |  iteration: 11449 teacher: 1 stage: sketch lr: 0.000413
batch 262 loss: 1.62830 acc: 0.68034 | v_loss: 1.30419 v_acc: 0.71777 |  iteration: 11450 teacher: 0 stage: sketch lr: 0.000413
batch 263 loss: 1.51566 acc: 0.69303 | v_loss: 1.29274 v_acc: 0.71875 |  iteration: 11451 teacher: 0 stage: sketch lr: 0.000413
batch 264 loss: 1.42275 acc: 0.69889 | v_loss: 1.43666 v_acc: 0.70475 |  iteration: 11452 teacher: 0 stage: sketch lr: 0.000413
batch 265 loss: 1.41950 acc: 0.70085 | v_loss: 1.31955 v_acc: 0.73014 |  iteration: 11453 teacher: 0 stage: sketch lr: 0.000413
batch 266 loss: 1.40396 acc: 0.70020 | v_loss: 1.53381 v_acc: 0.71322 |  iteration: 11454 teacher: 1 stage: sketch lr: 0.000413
batch 267 loss: 1.40744 acc: 0.71354 | v_loss: 1.28586 v_acc: 0.69531 |  iteration: 11455 teacher: 1 sta

batch 325 loss: 1.40915 acc: 0.70345 | v_loss: 1.39503 v_acc: 0.71908 |  iteration: 11513 teacher: 1 stage: sketch lr: 0.000412
batch 326 loss: 1.35653 acc: 0.70508 | v_loss: 1.42490 v_acc: 0.70150 |  iteration: 11514 teacher: 0 stage: sketch lr: 0.000412
batch 327 loss: 1.44531 acc: 0.71029 | v_loss: 1.42643 v_acc: 0.70833 |  iteration: 11515 teacher: 1 stage: sketch lr: 0.000412
batch 328 loss: 1.37209 acc: 0.71126 | v_loss: 1.22173 v_acc: 0.72331 |  iteration: 11516 teacher: 0 stage: sketch lr: 0.000412
batch 329 loss: 1.34336 acc: 0.70898 | v_loss: 1.38712 v_acc: 0.73145 |  iteration: 11517 teacher: 1 stage: sketch lr: 0.000412
batch 330 loss: 1.36259 acc: 0.70671 | v_loss: 1.48006 v_acc: 0.69727 |  iteration: 11518 teacher: 0 stage: sketch lr: 0.000412
batch 331 loss: 1.53455 acc: 0.68913 | v_loss: 1.42599 v_acc: 0.72363 |  iteration: 11519 teacher: 1 stage: sketch lr: 0.000412
batch 332 loss: 1.43503 acc: 0.69857 | v_loss: 1.24421 v_acc: 0.71842 |  iteration: 11520 teacher: 1 sta

batch 390 loss: 1.48421 acc: 0.70410 | v_loss: 1.37799 v_acc: 0.69336 |  iteration: 11578 teacher: 0 stage: sketch lr: 0.000411
batch 391 loss: 1.50217 acc: 0.70443 | v_loss: 1.30062 v_acc: 0.71191 |  iteration: 11579 teacher: 0 stage: sketch lr: 0.000411
batch 392 loss: 1.46296 acc: 0.70378 | v_loss: 1.36680 v_acc: 0.69629 |  iteration: 11580 teacher: 1 stage: sketch lr: 0.000411
batch 393 loss: 1.46894 acc: 0.70801 | v_loss: 1.47040 v_acc: 0.71517 |  iteration: 11581 teacher: 1 stage: sketch lr: 0.000411
batch 394 loss: 1.45969 acc: 0.69727 | v_loss: 1.31394 v_acc: 0.72363 |  iteration: 11582 teacher: 1 stage: sketch lr: 0.000411
batch 395 loss: 1.53678 acc: 0.69987 | v_loss: 1.45922 v_acc: 0.70117 |  iteration: 11583 teacher: 1 stage: sketch lr: 0.000411
batch 396 loss: 1.47880 acc: 0.70768 | v_loss: 1.35148 v_acc: 0.70085 |  iteration: 11584 teacher: 0 stage: sketch lr: 0.000411
batch 397 loss: 1.45771 acc: 0.71094 | v_loss: 1.34688 v_acc: 0.70703 |  iteration: 11585 teacher: 1 sta

batch 455 loss: 1.43000 acc: 0.70280 | v_loss: 1.42519 v_acc: 0.70410 |  iteration: 11643 teacher: 1 stage: sketch lr: 0.000410
batch 456 loss: 1.47258 acc: 0.70671 | v_loss: 1.40204 v_acc: 0.70768 |  iteration: 11644 teacher: 0 stage: sketch lr: 0.000410
batch 457 loss: 1.55091 acc: 0.68717 | v_loss: 1.40992 v_acc: 0.70475 |  iteration: 11645 teacher: 1 stage: sketch lr: 0.000410
batch 458 loss: 1.40235 acc: 0.71126 | v_loss: 1.26932 v_acc: 0.71354 |  iteration: 11646 teacher: 0 stage: sketch lr: 0.000410
batch 459 loss: 1.45637 acc: 0.69466 | v_loss: 1.32769 v_acc: 0.72461 |  iteration: 11647 teacher: 0 stage: sketch lr: 0.000410
batch 460 loss: 1.47292 acc: 0.69727 | v_loss: 1.18418 v_acc: 0.71517 |  iteration: 11648 teacher: 1 stage: sketch lr: 0.000409
batch 461 loss: 1.45860 acc: 0.70410 | v_loss: 1.34126 v_acc: 0.71126 |  iteration: 11649 teacher: 1 stage: sketch lr: 0.000409
batch 462 loss: 1.40273 acc: 0.71452 | v_loss: 1.51439 v_acc: 0.69954 |  iteration: 11650 teacher: 0 sta

batch 520 loss: 1.43435 acc: 0.70410 | v_loss: 1.49062 v_acc: 0.70410 |  iteration: 11708 teacher: 1 stage: sketch lr: 0.000408
batch 521 loss: 1.37713 acc: 0.70801 | v_loss: 1.67894 v_acc: 0.69238 |  iteration: 11709 teacher: 0 stage: sketch lr: 0.000408
batch 522 loss: 1.41698 acc: 0.70312 | v_loss: 1.53199 v_acc: 0.69531 |  iteration: 11710 teacher: 0 stage: sketch lr: 0.000408
batch 523 loss: 1.48785 acc: 0.68652 | v_loss: 1.29797 v_acc: 0.72168 |  iteration: 11711 teacher: 1 stage: sketch lr: 0.000408
batch 524 loss: 1.39281 acc: 0.70833 | v_loss: 1.37015 v_acc: 0.70866 |  iteration: 11712 teacher: 0 stage: sketch lr: 0.000408
batch 525 loss: 1.42285 acc: 0.70540 | v_loss: 1.21675 v_acc: 0.72103 |  iteration: 11713 teacher: 0 stage: sketch lr: 0.000408
batch 526 loss: 1.35914 acc: 0.71419 | v_loss: 1.43000 v_acc: 0.70020 |  iteration: 11714 teacher: 0 stage: sketch lr: 0.000408
batch 527 loss: 1.45400 acc: 0.70215 | v_loss: 1.35823 v_acc: 0.71094 |  iteration: 11715 teacher: 0 sta

batch 585 loss: 1.56968 acc: 0.70020 | v_loss: 1.38473 v_acc: 0.69759 |  iteration: 11773 teacher: 0 stage: sketch lr: 0.000407
batch 586 loss: 1.40047 acc: 0.69987 | v_loss: 1.32582 v_acc: 0.70215 |  iteration: 11774 teacher: 1 stage: sketch lr: 0.000407
batch 587 loss: 1.49038 acc: 0.70280 | v_loss: 1.34062 v_acc: 0.69889 |  iteration: 11775 teacher: 0 stage: sketch lr: 0.000407
batch 588 loss: 1.36943 acc: 0.70768 | v_loss: 1.33242 v_acc: 0.71419 |  iteration: 11776 teacher: 0 stage: sketch lr: 0.000407
batch 589 loss: 1.45170 acc: 0.70150 | v_loss: 1.51854 v_acc: 0.69401 |  iteration: 11777 teacher: 1 stage: sketch lr: 0.000407
batch 590 loss: 1.43261 acc: 0.70215 | v_loss: 1.37868 v_acc: 0.69922 |  iteration: 11778 teacher: 1 stage: sketch lr: 0.000407
batch 591 loss: 1.54151 acc: 0.69173 | v_loss: 1.35645 v_acc: 0.71126 |  iteration: 11779 teacher: 1 stage: sketch lr: 0.000407
batch 592 loss: 1.47897 acc: 0.70378 | v_loss: 1.38784 v_acc: 0.71810 |  iteration: 11780 teacher: 0 sta

batch 650 loss: 1.45830 acc: 0.69694 | v_loss: 1.27599 v_acc: 0.71647 |  iteration: 11838 teacher: 1 stage: sketch lr: 0.000406
batch 651 loss: 1.50520 acc: 0.69401 | v_loss: 1.33426 v_acc: 0.74089 |  iteration: 11839 teacher: 0 stage: sketch lr: 0.000406
batch 652 loss: 1.41440 acc: 0.70345 | v_loss: 1.26851 v_acc: 0.72005 |  iteration: 11840 teacher: 0 stage: sketch lr: 0.000406
batch 653 loss: 1.41866 acc: 0.69889 | v_loss: 1.31289 v_acc: 0.71582 |  iteration: 11841 teacher: 1 stage: sketch lr: 0.000406
batch 654 loss: 1.40472 acc: 0.70247 | v_loss: 1.41705 v_acc: 0.71224 |  iteration: 11842 teacher: 1 stage: sketch lr: 0.000406
batch 655 loss: 1.46991 acc: 0.70247 | v_loss: 1.39767 v_acc: 0.72070 |  iteration: 11843 teacher: 1 stage: sketch lr: 0.000406
batch 656 loss: 1.36030 acc: 0.71029 | v_loss: 1.48884 v_acc: 0.69954 |  iteration: 11844 teacher: 1 stage: sketch lr: 0.000406
batch 657 loss: 1.44928 acc: 0.70085 | v_loss: 1.41619 v_acc: 0.71615 |  iteration: 11845 teacher: 1 sta

batch 715 loss: 1.45374 acc: 0.70345 | v_loss: 1.46856 v_acc: 0.69368 |  iteration: 11903 teacher: 0 stage: sketch lr: 0.000405
batch 716 loss: 1.48373 acc: 0.69694 | v_loss: 1.33082 v_acc: 0.71159 |  iteration: 11904 teacher: 1 stage: sketch lr: 0.000405
batch 717 loss: 1.42307 acc: 0.70215 | v_loss: 1.28940 v_acc: 0.71549 |  iteration: 11905 teacher: 1 stage: sketch lr: 0.000405
batch 718 loss: 1.40318 acc: 0.69661 | v_loss: 1.28144 v_acc: 0.71973 |  iteration: 11906 teacher: 0 stage: sketch lr: 0.000405
batch 719 loss: 1.51014 acc: 0.70312 | v_loss: 1.42166 v_acc: 0.70573 |  iteration: 11907 teacher: 1 stage: sketch lr: 0.000405
batch 720 loss: 1.42700 acc: 0.69922 | v_loss: 1.31726 v_acc: 0.73112 |  iteration: 11908 teacher: 1 stage: sketch lr: 0.000405
batch 721 loss: 1.58848 acc: 0.68327 | v_loss: 1.54124 v_acc: 0.71452 |  iteration: 11909 teacher: 1 stage: sketch lr: 0.000405
batch 722 loss: 1.46643 acc: 0.70182 | v_loss: 1.29287 v_acc: 0.69759 |  iteration: 11910 teacher: 1 sta

batch 780 loss: 1.40424 acc: 0.71387 | v_loss: 1.37852 v_acc: 0.71647 |  iteration: 11968 teacher: 0 stage: sketch lr: 0.000404
batch 781 loss: 1.45119 acc: 0.70215 | v_loss: 1.43169 v_acc: 0.70052 |  iteration: 11969 teacher: 1 stage: sketch lr: 0.000404
batch 782 loss: 1.44015 acc: 0.70378 | v_loss: 1.42693 v_acc: 0.70312 |  iteration: 11970 teacher: 0 stage: sketch lr: 0.000404
batch 783 loss: 1.53439 acc: 0.69564 | v_loss: 1.23090 v_acc: 0.71419 |  iteration: 11971 teacher: 0 stage: sketch lr: 0.000404
batch 784 loss: 1.55670 acc: 0.70280 | v_loss: 1.39162 v_acc: 0.72786 |  iteration: 11972 teacher: 0 stage: sketch lr: 0.000404
batch 785 loss: 1.52314 acc: 0.68783 | v_loss: 1.47130 v_acc: 0.69792 |  iteration: 11973 teacher: 0 stage: sketch lr: 0.000404
batch 786 loss: 1.51320 acc: 0.69303 | v_loss: 1.40419 v_acc: 0.72070 |  iteration: 11974 teacher: 0 stage: sketch lr: 0.000404
batch 787 loss: 1.49864 acc: 0.70085 | v_loss: 1.25892 v_acc: 0.71810 |  iteration: 11975 teacher: 1 sta

batch 845 loss: 1.64086 acc: 0.68229 | v_loss: 1.36753 v_acc: 0.69824 |  iteration: 12033 teacher: 1 stage: sketch lr: 0.000403
batch 846 loss: 1.32277 acc: 0.70573 | v_loss: 1.30670 v_acc: 0.71257 |  iteration: 12034 teacher: 1 stage: sketch lr: 0.000403
batch 847 loss: 1.37550 acc: 0.70801 | v_loss: 1.36915 v_acc: 0.69629 |  iteration: 12035 teacher: 1 stage: sketch lr: 0.000403
batch 848 loss: 1.48893 acc: 0.70280 | v_loss: 1.50528 v_acc: 0.72005 |  iteration: 12036 teacher: 1 stage: sketch lr: 0.000403
batch 849 loss: 1.47829 acc: 0.69596 | v_loss: 1.32857 v_acc: 0.72786 |  iteration: 12037 teacher: 0 stage: sketch lr: 0.000403
batch 850 loss: 1.49459 acc: 0.70671 | v_loss: 1.45604 v_acc: 0.70443 |  iteration: 12038 teacher: 0 stage: sketch lr: 0.000403
batch 851 loss: 1.45806 acc: 0.69759 | v_loss: 1.36578 v_acc: 0.69922 |  iteration: 12039 teacher: 0 stage: sketch lr: 0.000403
batch 852 loss: 1.49465 acc: 0.69401 | v_loss: 1.32642 v_acc: 0.70898 |  iteration: 12040 teacher: 1 sta

batch 910 loss: 1.44396 acc: 0.70703 | v_loss: 1.42031 v_acc: 0.70540 |  iteration: 12098 teacher: 1 stage: sketch lr: 0.000402
batch 911 loss: 1.55359 acc: 0.69303 | v_loss: 1.40758 v_acc: 0.70540 |  iteration: 12099 teacher: 0 stage: sketch lr: 0.000402
batch 912 loss: 1.34266 acc: 0.71159 | v_loss: 1.40009 v_acc: 0.70605 |  iteration: 12100 teacher: 0 stage: sketch lr: 0.000402
batch 913 loss: 1.44319 acc: 0.70410 | v_loss: 1.28002 v_acc: 0.71126 |  iteration: 12101 teacher: 0 stage: sketch lr: 0.000402
batch 914 loss: 1.41408 acc: 0.70345 | v_loss: 1.31938 v_acc: 0.72331 |  iteration: 12102 teacher: 1 stage: sketch lr: 0.000402
batch 915 loss: 1.31658 acc: 0.70443 | v_loss: 1.20250 v_acc: 0.70671 |  iteration: 12103 teacher: 0 stage: sketch lr: 0.000402
batch 916 loss: 1.39431 acc: 0.70898 | v_loss: 1.34585 v_acc: 0.70150 |  iteration: 12104 teacher: 0 stage: sketch lr: 0.000402
batch 917 loss: 1.41957 acc: 0.70215 | v_loss: 1.50977 v_acc: 0.69987 |  iteration: 12105 teacher: 1 sta

batch 975 loss: 1.48808 acc: 0.69824 | v_loss: 1.43625 v_acc: 0.71647 |  iteration: 12163 teacher: 0 stage: sketch lr: 0.000401
batch 976 loss: 1.47867 acc: 0.70150 | v_loss: 1.67190 v_acc: 0.69303 |  iteration: 12164 teacher: 1 stage: sketch lr: 0.000401
batch 977 loss: 1.47414 acc: 0.69824 | v_loss: 1.52104 v_acc: 0.70117 |  iteration: 12165 teacher: 1 stage: sketch lr: 0.000401
batch 978 loss: 1.41407 acc: 0.69922 | v_loss: 1.29539 v_acc: 0.72363 |  iteration: 12166 teacher: 0 stage: sketch lr: 0.000401
batch 979 loss: 1.48246 acc: 0.69466 | v_loss: 1.37342 v_acc: 0.70410 |  iteration: 12167 teacher: 1 stage: sketch lr: 0.000401
batch 980 loss: 1.46633 acc: 0.70085 | v_loss: 1.21736 v_acc: 0.71973 |  iteration: 12168 teacher: 0 stage: sketch lr: 0.000401
batch 981 loss: 1.35204 acc: 0.70833 | v_loss: 1.42445 v_acc: 0.70150 |  iteration: 12169 teacher: 1 stage: sketch lr: 0.000401
batch 982 loss: 1.39302 acc: 0.70768 | v_loss: 1.36677 v_acc: 0.71029 |  iteration: 12170 teacher: 0 sta

batch 1039 loss: 1.41709 acc: 0.69434 | v_loss: 1.52855 v_acc: 0.68978 |  iteration: 12227 teacher: 1 stage: sketch lr: 0.000400
batch 1040 loss: 1.38189 acc: 0.70833 | v_loss: 1.36902 v_acc: 0.69792 |  iteration: 12228 teacher: 0 stage: sketch lr: 0.000400
batch 1041 loss: 1.46189 acc: 0.69596 | v_loss: 1.33100 v_acc: 0.70378 |  iteration: 12229 teacher: 1 stage: sketch lr: 0.000400
batch 1042 loss: 1.32966 acc: 0.71094 | v_loss: 1.33261 v_acc: 0.70215 |  iteration: 12230 teacher: 0 stage: sketch lr: 0.000400
batch 1043 loss: 1.46982 acc: 0.69694 | v_loss: 1.33304 v_acc: 0.71419 |  iteration: 12231 teacher: 0 stage: sketch lr: 0.000400
batch 1044 loss: 1.46388 acc: 0.70312 | v_loss: 1.52381 v_acc: 0.69173 |  iteration: 12232 teacher: 1 stage: sketch lr: 0.000400
batch 1045 loss: 1.53976 acc: 0.70085 | v_loss: 1.37799 v_acc: 0.70247 |  iteration: 12233 teacher: 0 stage: sketch lr: 0.000400
batch 1046 loss: 1.42954 acc: 0.72038 | v_loss: 1.35591 v_acc: 0.71126 |  iteration: 12234 teache

batch 1103 loss: 1.43115 acc: 0.70280 | v_loss: 1.24568 v_acc: 0.70410 |  iteration: 12291 teacher: 1 stage: sketch lr: 0.000399
batch 1104 loss: 1.37450 acc: 0.70801 | v_loss: 1.23749 v_acc: 0.73828 |  iteration: 12292 teacher: 0 stage: sketch lr: 0.000399
batch 1105 loss: 1.42050 acc: 0.70215 | v_loss: 1.26766 v_acc: 0.72168 |  iteration: 12293 teacher: 0 stage: sketch lr: 0.000399
batch 1106 loss: 1.48019 acc: 0.70410 | v_loss: 1.33792 v_acc: 0.72689 |  iteration: 12294 teacher: 1 stage: sketch lr: 0.000399
batch 1107 loss: 1.46468 acc: 0.69564 | v_loss: 1.25995 v_acc: 0.72852 |  iteration: 12295 teacher: 1 stage: sketch lr: 0.000399
batch 1108 loss: 1.34955 acc: 0.70931 | v_loss: 1.30647 v_acc: 0.72103 |  iteration: 12296 teacher: 1 stage: sketch lr: 0.000399
batch 1109 loss: 1.45536 acc: 0.70540 | v_loss: 1.42473 v_acc: 0.71354 |  iteration: 12297 teacher: 0 stage: sketch lr: 0.000399
batch 1110 loss: 1.44063 acc: 0.71615 | v_loss: 1.40326 v_acc: 0.72070 |  iteration: 12298 teache

batch 1167 loss: 1.42569 acc: 0.70703 | v_loss: 1.36737 v_acc: 0.70345 |  iteration: 12355 teacher: 1 stage: sketch lr: 0.000398
batch 1168 loss: 1.49862 acc: 0.69303 | v_loss: 1.31002 v_acc: 0.72201 |  iteration: 12356 teacher: 1 stage: sketch lr: 0.000398
batch 1169 loss: 1.47782 acc: 0.70085 | v_loss: 1.33110 v_acc: 0.72070 |  iteration: 12357 teacher: 1 stage: sketch lr: 0.000398
batch 1170 loss: 1.41307 acc: 0.69889 | v_loss: 1.53301 v_acc: 0.69043 |  iteration: 12358 teacher: 0 stage: sketch lr: 0.000398
batch 1171 loss: 1.39929 acc: 0.71647 | v_loss: 1.34640 v_acc: 0.70833 |  iteration: 12359 teacher: 1 stage: sketch lr: 0.000398
batch 1172 loss: 1.33321 acc: 0.71061 | v_loss: 1.28907 v_acc: 0.71582 |  iteration: 12360 teacher: 0 stage: sketch lr: 0.000398
batch 1173 loss: 1.47841 acc: 0.69987 | v_loss: 1.28449 v_acc: 0.72201 |  iteration: 12361 teacher: 0 stage: sketch lr: 0.000398
batch 1174 loss: 1.62903 acc: 0.68978 | v_loss: 1.42412 v_acc: 0.70768 |  iteration: 12362 teache

batch 1231 loss: 1.45221 acc: 0.69824 | v_loss: 1.43562 v_acc: 0.69466 |  iteration: 12419 teacher: 0 stage: sketch lr: 0.000397
batch 1232 loss: 1.42850 acc: 0.70117 | v_loss: 1.42862 v_acc: 0.71615 |  iteration: 12420 teacher: 1 stage: sketch lr: 0.000397
batch 1233 loss: 1.42495 acc: 0.70247 | v_loss: 1.28082 v_acc: 0.71712 |  iteration: 12421 teacher: 1 stage: sketch lr: 0.000397
batch 1234 loss: 1.50314 acc: 0.70052 | v_loss: 1.25563 v_acc: 0.72819 |  iteration: 12422 teacher: 0 stage: sketch lr: 0.000397
batch 1235 loss: 1.39889 acc: 0.69629 | v_loss: 1.39032 v_acc: 0.72266 |  iteration: 12423 teacher: 1 stage: sketch lr: 0.000397
batch 1236 loss: 1.42495 acc: 0.70475 | v_loss: 1.41627 v_acc: 0.70573 |  iteration: 12424 teacher: 0 stage: sketch lr: 0.000396
batch 1237 loss: 1.40390 acc: 0.70931 | v_loss: 1.42868 v_acc: 0.70736 |  iteration: 12425 teacher: 0 stage: sketch lr: 0.000396
batch 1238 loss: 1.41401 acc: 0.69629 | v_loss: 1.20916 v_acc: 0.72591 |  iteration: 12426 teache

batch 52 loss: 1.45061 acc: 0.69727 | v_loss: 1.37297 v_acc: 0.71647 |  iteration: 12483 teacher: 0 stage: sketch lr: 0.000396
batch 53 loss: 1.44116 acc: 0.70898 | v_loss: 1.43361 v_acc: 0.70280 |  iteration: 12484 teacher: 1 stage: sketch lr: 0.000396
batch 54 loss: 1.38790 acc: 0.71094 | v_loss: 1.43359 v_acc: 0.70312 |  iteration: 12485 teacher: 1 stage: sketch lr: 0.000396
batch 55 loss: 1.35122 acc: 0.69922 | v_loss: 1.24285 v_acc: 0.71387 |  iteration: 12486 teacher: 0 stage: sketch lr: 0.000396
batch 56 loss: 1.39989 acc: 0.70768 | v_loss: 1.40790 v_acc: 0.72201 |  iteration: 12487 teacher: 1 stage: sketch lr: 0.000395
batch 57 loss: 1.46591 acc: 0.69694 | v_loss: 1.48161 v_acc: 0.69792 |  iteration: 12488 teacher: 0 stage: sketch lr: 0.000395
batch 58 loss: 1.47421 acc: 0.69596 | v_loss: 1.41950 v_acc: 0.72201 |  iteration: 12489 teacher: 1 stage: sketch lr: 0.000395
batch 59 loss: 1.41180 acc: 0.70768 | v_loss: 1.24224 v_acc: 0.72233 |  iteration: 12490 teacher: 0 stage: sket

batch 117 loss: 1.42788 acc: 0.70508 | v_loss: 1.36620 v_acc: 0.69368 |  iteration: 12548 teacher: 1 stage: sketch lr: 0.000395
batch 118 loss: 1.40716 acc: 0.70964 | v_loss: 1.29565 v_acc: 0.71647 |  iteration: 12549 teacher: 0 stage: sketch lr: 0.000395
batch 119 loss: 1.41006 acc: 0.69987 | v_loss: 1.36896 v_acc: 0.69629 |  iteration: 12550 teacher: 1 stage: sketch lr: 0.000394
batch 120 loss: 1.39867 acc: 0.71615 | v_loss: 1.50570 v_acc: 0.70638 |  iteration: 12551 teacher: 0 stage: sketch lr: 0.000394
batch 121 loss: 1.47298 acc: 0.69824 | v_loss: 1.33067 v_acc: 0.72363 |  iteration: 12552 teacher: 0 stage: sketch lr: 0.000394
batch 122 loss: 1.43977 acc: 0.69792 | v_loss: 1.45722 v_acc: 0.70508 |  iteration: 12553 teacher: 1 stage: sketch lr: 0.000394
batch 123 loss: 1.56519 acc: 0.69206 | v_loss: 1.36052 v_acc: 0.69922 |  iteration: 12554 teacher: 1 stage: sketch lr: 0.000394
batch 124 loss: 1.44125 acc: 0.70182 | v_loss: 1.32248 v_acc: 0.70898 |  iteration: 12555 teacher: 0 sta

batch 182 loss: 1.36212 acc: 0.70703 | v_loss: 1.41117 v_acc: 0.70573 |  iteration: 12613 teacher: 1 stage: sketch lr: 0.000394
batch 183 loss: 1.46962 acc: 0.69043 | v_loss: 1.40569 v_acc: 0.70573 |  iteration: 12614 teacher: 1 stage: sketch lr: 0.000393
batch 184 loss: 1.44203 acc: 0.70410 | v_loss: 1.40960 v_acc: 0.70573 |  iteration: 12615 teacher: 0 stage: sketch lr: 0.000393
batch 185 loss: 1.48534 acc: 0.70182 | v_loss: 1.26890 v_acc: 0.71126 |  iteration: 12616 teacher: 1 stage: sketch lr: 0.000393
batch 186 loss: 1.47994 acc: 0.70801 | v_loss: 1.34080 v_acc: 0.72331 |  iteration: 12617 teacher: 1 stage: sketch lr: 0.000393
batch 187 loss: 1.42752 acc: 0.70508 | v_loss: 1.20752 v_acc: 0.70573 |  iteration: 12618 teacher: 0 stage: sketch lr: 0.000393
batch 188 loss: 1.44351 acc: 0.69238 | v_loss: 1.35723 v_acc: 0.70150 |  iteration: 12619 teacher: 0 stage: sketch lr: 0.000393
batch 189 loss: 1.32082 acc: 0.70801 | v_loss: 1.51078 v_acc: 0.69987 |  iteration: 12620 teacher: 0 sta

batch 247 loss: 1.49030 acc: 0.69727 | v_loss: 1.44961 v_acc: 0.71680 |  iteration: 12678 teacher: 1 stage: sketch lr: 0.000392
batch 248 loss: 1.41833 acc: 0.70540 | v_loss: 1.65692 v_acc: 0.69434 |  iteration: 12679 teacher: 0 stage: sketch lr: 0.000392
batch 249 loss: 1.39283 acc: 0.71224 | v_loss: 1.52667 v_acc: 0.70052 |  iteration: 12680 teacher: 0 stage: sketch lr: 0.000392
batch 250 loss: 1.37046 acc: 0.70215 | v_loss: 1.29502 v_acc: 0.72331 |  iteration: 12681 teacher: 0 stage: sketch lr: 0.000392
batch 251 loss: 1.35613 acc: 0.70964 | v_loss: 1.38992 v_acc: 0.70052 |  iteration: 12682 teacher: 1 stage: sketch lr: 0.000392
batch 252 loss: 1.44286 acc: 0.70052 | v_loss: 1.22650 v_acc: 0.72038 |  iteration: 12683 teacher: 0 stage: sketch lr: 0.000392
batch 253 loss: 1.50193 acc: 0.69694 | v_loss: 1.44935 v_acc: 0.70150 |  iteration: 12684 teacher: 1 stage: sketch lr: 0.000392
batch 254 loss: 1.33321 acc: 0.71517 | v_loss: 1.36651 v_acc: 0.71029 |  iteration: 12685 teacher: 1 sta

batch 312 loss: 1.41033 acc: 0.70215 | v_loss: 1.37055 v_acc: 0.70182 |  iteration: 12743 teacher: 1 stage: sketch lr: 0.000391
batch 313 loss: 1.57213 acc: 0.68978 | v_loss: 1.31899 v_acc: 0.70638 |  iteration: 12744 teacher: 1 stage: sketch lr: 0.000391
batch 314 loss: 1.43068 acc: 0.70085 | v_loss: 1.33516 v_acc: 0.70020 |  iteration: 12745 teacher: 0 stage: sketch lr: 0.000391
batch 315 loss: 1.42980 acc: 0.70215 | v_loss: 1.32972 v_acc: 0.71484 |  iteration: 12746 teacher: 1 stage: sketch lr: 0.000391
batch 316 loss: 1.42731 acc: 0.70345 | v_loss: 1.53442 v_acc: 0.68978 |  iteration: 12747 teacher: 1 stage: sketch lr: 0.000391
batch 317 loss: 1.46592 acc: 0.69792 | v_loss: 1.37969 v_acc: 0.70443 |  iteration: 12748 teacher: 1 stage: sketch lr: 0.000391
batch 318 loss: 1.43868 acc: 0.70931 | v_loss: 1.35062 v_acc: 0.70898 |  iteration: 12749 teacher: 0 stage: sketch lr: 0.000391
batch 319 loss: 1.45329 acc: 0.69954 | v_loss: 1.39323 v_acc: 0.71810 |  iteration: 12750 teacher: 1 sta

batch 377 loss: 1.40666 acc: 0.70833 | v_loss: 1.26298 v_acc: 0.72168 |  iteration: 12808 teacher: 1 stage: sketch lr: 0.000391
batch 378 loss: 1.39353 acc: 0.70801 | v_loss: 1.33805 v_acc: 0.73145 |  iteration: 12809 teacher: 0 stage: sketch lr: 0.000390
batch 379 loss: 1.35203 acc: 0.70215 | v_loss: 1.25944 v_acc: 0.72786 |  iteration: 12810 teacher: 0 stage: sketch lr: 0.000390
batch 380 loss: 1.48242 acc: 0.70410 | v_loss: 1.30702 v_acc: 0.72103 |  iteration: 12811 teacher: 1 stage: sketch lr: 0.000390
batch 381 loss: 1.35691 acc: 0.70638 | v_loss: 1.41741 v_acc: 0.71354 |  iteration: 12812 teacher: 1 stage: sketch lr: 0.000390
batch 382 loss: 1.43384 acc: 0.70671 | v_loss: 1.39667 v_acc: 0.72070 |  iteration: 12813 teacher: 0 stage: sketch lr: 0.000390
batch 383 loss: 1.43572 acc: 0.70540 | v_loss: 1.49235 v_acc: 0.69922 |  iteration: 12814 teacher: 0 stage: sketch lr: 0.000390
batch 384 loss: 1.39620 acc: 0.70345 | v_loss: 1.41198 v_acc: 0.71973 |  iteration: 12815 teacher: 0 sta

batch 442 loss: 1.51581 acc: 0.69271 | v_loss: 1.48173 v_acc: 0.69206 |  iteration: 12873 teacher: 1 stage: sketch lr: 0.000390
batch 443 loss: 1.52310 acc: 0.70475 | v_loss: 1.33251 v_acc: 0.70801 |  iteration: 12874 teacher: 0 stage: sketch lr: 0.000390
batch 444 loss: 1.48858 acc: 0.69792 | v_loss: 1.29070 v_acc: 0.71777 |  iteration: 12875 teacher: 0 stage: sketch lr: 0.000389
batch 445 loss: 1.53896 acc: 0.69368 | v_loss: 1.28960 v_acc: 0.71875 |  iteration: 12876 teacher: 0 stage: sketch lr: 0.000389
batch 446 loss: 1.43156 acc: 0.71452 | v_loss: 1.42643 v_acc: 0.70573 |  iteration: 12877 teacher: 0 stage: sketch lr: 0.000389
batch 447 loss: 1.47299 acc: 0.70345 | v_loss: 1.30435 v_acc: 0.73242 |  iteration: 12878 teacher: 1 stage: sketch lr: 0.000389
batch 448 loss: 1.45663 acc: 0.69857 | v_loss: 1.53557 v_acc: 0.71484 |  iteration: 12879 teacher: 0 stage: sketch lr: 0.000389
batch 449 loss: 1.37472 acc: 0.70605 | v_loss: 1.29457 v_acc: 0.69759 |  iteration: 12880 teacher: 1 sta

batch 507 loss: 1.47224 acc: 0.70150 | v_loss: 1.39624 v_acc: 0.72266 |  iteration: 12938 teacher: 1 stage: sketch lr: 0.000389
batch 508 loss: 1.54401 acc: 0.69889 | v_loss: 1.42604 v_acc: 0.70410 |  iteration: 12939 teacher: 0 stage: sketch lr: 0.000389
batch 509 loss: 1.36847 acc: 0.71029 | v_loss: 1.42061 v_acc: 0.70410 |  iteration: 12940 teacher: 0 stage: sketch lr: 0.000389
batch 510 loss: 1.45852 acc: 0.69694 | v_loss: 1.24054 v_acc: 0.71419 |  iteration: 12941 teacher: 1 stage: sketch lr: 0.000388
batch 511 loss: 1.50059 acc: 0.70020 | v_loss: 1.38358 v_acc: 0.72819 |  iteration: 12942 teacher: 1 stage: sketch lr: 0.000388
batch 512 loss: 1.44261 acc: 0.70768 | v_loss: 1.45842 v_acc: 0.70085 |  iteration: 12943 teacher: 1 stage: sketch lr: 0.000388
batch 513 loss: 1.32975 acc: 0.71745 | v_loss: 1.38326 v_acc: 0.71908 |  iteration: 12944 teacher: 1 stage: sketch lr: 0.000388
batch 514 loss: 1.47254 acc: 0.69596 | v_loss: 1.27207 v_acc: 0.71615 |  iteration: 12945 teacher: 1 sta

batch 572 loss: 1.48346 acc: 0.69857 | v_loss: 1.36211 v_acc: 0.69596 |  iteration: 13003 teacher: 0 stage: sketch lr: 0.000388
batch 573 loss: 1.51434 acc: 0.69368 | v_loss: 1.31989 v_acc: 0.70833 |  iteration: 13004 teacher: 1 stage: sketch lr: 0.000388
batch 574 loss: 1.50244 acc: 0.70605 | v_loss: 1.37673 v_acc: 0.69076 |  iteration: 13005 teacher: 0 stage: sketch lr: 0.000388
batch 575 loss: 1.36370 acc: 0.72005 | v_loss: 1.46935 v_acc: 0.70671 |  iteration: 13006 teacher: 1 stage: sketch lr: 0.000388
batch 576 loss: 1.44304 acc: 0.69759 | v_loss: 1.31331 v_acc: 0.72070 |  iteration: 13007 teacher: 0 stage: sketch lr: 0.000388
batch 577 loss: 1.51816 acc: 0.69368 | v_loss: 1.45410 v_acc: 0.70540 |  iteration: 13008 teacher: 0 stage: sketch lr: 0.000387
batch 578 loss: 1.39840 acc: 0.71322 | v_loss: 1.36983 v_acc: 0.70052 |  iteration: 13009 teacher: 1 stage: sketch lr: 0.000387
batch 579 loss: 1.44031 acc: 0.70768 | v_loss: 1.33729 v_acc: 0.70801 |  iteration: 13010 teacher: 1 sta

batch 637 loss: 1.39501 acc: 0.70378 | v_loss: 1.42981 v_acc: 0.70410 |  iteration: 13068 teacher: 1 stage: sketch lr: 0.000387
batch 638 loss: 1.39075 acc: 0.70736 | v_loss: 1.41027 v_acc: 0.70768 |  iteration: 13069 teacher: 1 stage: sketch lr: 0.000387
batch 639 loss: 1.41083 acc: 0.70540 | v_loss: 1.40744 v_acc: 0.70703 |  iteration: 13070 teacher: 1 stage: sketch lr: 0.000387
batch 640 loss: 1.46164 acc: 0.71354 | v_loss: 1.26318 v_acc: 0.71419 |  iteration: 13071 teacher: 1 stage: sketch lr: 0.000387
batch 641 loss: 1.50955 acc: 0.69824 | v_loss: 1.32154 v_acc: 0.72461 |  iteration: 13072 teacher: 0 stage: sketch lr: 0.000387
batch 642 loss: 1.43700 acc: 0.70638 | v_loss: 1.18574 v_acc: 0.71517 |  iteration: 13073 teacher: 0 stage: sketch lr: 0.000387
batch 643 loss: 1.46524 acc: 0.70508 | v_loss: 1.34400 v_acc: 0.70703 |  iteration: 13074 teacher: 0 stage: sketch lr: 0.000387
batch 644 loss: 1.46098 acc: 0.69857 | v_loss: 1.49835 v_acc: 0.69987 |  iteration: 13075 teacher: 1 sta

batch 702 loss: 1.42085 acc: 0.69629 | v_loss: 1.43882 v_acc: 0.72949 |  iteration: 13133 teacher: 1 stage: sketch lr: 0.000386
batch 703 loss: 1.40078 acc: 0.71061 | v_loss: 1.63931 v_acc: 0.69466 |  iteration: 13134 teacher: 1 stage: sketch lr: 0.000386
batch 704 loss: 1.41644 acc: 0.70931 | v_loss: 1.51389 v_acc: 0.70378 |  iteration: 13135 teacher: 1 stage: sketch lr: 0.000386
batch 705 loss: 1.34889 acc: 0.71191 | v_loss: 1.29461 v_acc: 0.72103 |  iteration: 13136 teacher: 0 stage: sketch lr: 0.000386
batch 706 loss: 1.44140 acc: 0.69857 | v_loss: 1.37649 v_acc: 0.70410 |  iteration: 13137 teacher: 1 stage: sketch lr: 0.000386
batch 707 loss: 1.36899 acc: 0.71159 | v_loss: 1.22067 v_acc: 0.71973 |  iteration: 13138 teacher: 0 stage: sketch lr: 0.000386
batch 708 loss: 1.40166 acc: 0.70378 | v_loss: 1.42095 v_acc: 0.70312 |  iteration: 13139 teacher: 1 stage: sketch lr: 0.000386
batch 709 loss: 1.38852 acc: 0.70345 | v_loss: 1.35351 v_acc: 0.71810 |  iteration: 13140 teacher: 1 sta

batch 767 loss: 1.49485 acc: 0.69987 | v_loss: 1.37532 v_acc: 0.70085 |  iteration: 13198 teacher: 1 stage: sketch lr: 0.000385
batch 768 loss: 1.53066 acc: 0.69694 | v_loss: 1.31917 v_acc: 0.70312 |  iteration: 13199 teacher: 0 stage: sketch lr: 0.000385
batch 769 loss: 1.46457 acc: 0.69271 | v_loss: 1.33062 v_acc: 0.70020 |  iteration: 13200 teacher: 1 stage: sketch lr: 0.000385
batch 770 loss: 1.49316 acc: 0.70540 | v_loss: 1.32745 v_acc: 0.71484 |  iteration: 13201 teacher: 0 stage: sketch lr: 0.000385
batch 771 loss: 1.39469 acc: 0.70345 | v_loss: 1.54012 v_acc: 0.68978 |  iteration: 13202 teacher: 0 stage: sketch lr: 0.000385
batch 772 loss: 1.40228 acc: 0.70573 | v_loss: 1.38563 v_acc: 0.70443 |  iteration: 13203 teacher: 1 stage: sketch lr: 0.000385
batch 773 loss: 1.39735 acc: 0.69792 | v_loss: 1.33755 v_acc: 0.71029 |  iteration: 13204 teacher: 0 stage: sketch lr: 0.000385
batch 774 loss: 1.46777 acc: 0.69401 | v_loss: 1.38317 v_acc: 0.71680 |  iteration: 13205 teacher: 0 sta

batch 832 loss: 1.42265 acc: 0.70703 | v_loss: 1.27342 v_acc: 0.72168 |  iteration: 13263 teacher: 1 stage: sketch lr: 0.000384
batch 833 loss: 1.39159 acc: 0.70540 | v_loss: 1.33486 v_acc: 0.73145 |  iteration: 13264 teacher: 0 stage: sketch lr: 0.000384
batch 834 loss: 1.47374 acc: 0.70573 | v_loss: 1.25937 v_acc: 0.71973 |  iteration: 13265 teacher: 0 stage: sketch lr: 0.000384
batch 835 loss: 1.57956 acc: 0.69141 | v_loss: 1.29805 v_acc: 0.71582 |  iteration: 13266 teacher: 1 stage: sketch lr: 0.000384
batch 836 loss: 1.47318 acc: 0.69759 | v_loss: 1.40946 v_acc: 0.71322 |  iteration: 13267 teacher: 1 stage: sketch lr: 0.000384
batch 837 loss: 1.37185 acc: 0.70052 | v_loss: 1.37619 v_acc: 0.72103 |  iteration: 13268 teacher: 1 stage: sketch lr: 0.000384
batch 838 loss: 1.26621 acc: 0.71322 | v_loss: 1.48006 v_acc: 0.69922 |  iteration: 13269 teacher: 1 stage: sketch lr: 0.000384
batch 839 loss: 1.36473 acc: 0.70964 | v_loss: 1.41833 v_acc: 0.71908 |  iteration: 13270 teacher: 1 sta

batch 897 loss: 1.55415 acc: 0.68522 | v_loss: 1.50277 v_acc: 0.69206 |  iteration: 13328 teacher: 1 stage: sketch lr: 0.000383
batch 898 loss: 1.47913 acc: 0.68848 | v_loss: 1.33102 v_acc: 0.71419 |  iteration: 13329 teacher: 0 stage: sketch lr: 0.000383
batch 899 loss: 1.40404 acc: 0.70378 | v_loss: 1.28425 v_acc: 0.71777 |  iteration: 13330 teacher: 1 stage: sketch lr: 0.000383
batch 900 loss: 1.33610 acc: 0.70378 | v_loss: 1.28230 v_acc: 0.71973 |  iteration: 13331 teacher: 1 stage: sketch lr: 0.000383
batch 901 loss: 1.47345 acc: 0.69596 | v_loss: 1.42964 v_acc: 0.70573 |  iteration: 13332 teacher: 1 stage: sketch lr: 0.000383
batch 902 loss: 1.53728 acc: 0.68750 | v_loss: 1.32104 v_acc: 0.72982 |  iteration: 13333 teacher: 1 stage: sketch lr: 0.000383
batch 903 loss: 1.40279 acc: 0.70052 | v_loss: 1.54464 v_acc: 0.71322 |  iteration: 13334 teacher: 0 stage: sketch lr: 0.000383
batch 904 loss: 1.55859 acc: 0.69401 | v_loss: 1.29061 v_acc: 0.69531 |  iteration: 13335 teacher: 0 sta

batch 962 loss: 1.25410 acc: 0.71094 | v_loss: 1.39898 v_acc: 0.72266 |  iteration: 13393 teacher: 1 stage: sketch lr: 0.000382
batch 963 loss: 1.44939 acc: 0.69792 | v_loss: 1.45126 v_acc: 0.70410 |  iteration: 13394 teacher: 1 stage: sketch lr: 0.000382
batch 964 loss: 1.54026 acc: 0.69010 | v_loss: 1.45572 v_acc: 0.70736 |  iteration: 13395 teacher: 1 stage: sketch lr: 0.000382
batch 965 loss: 1.40876 acc: 0.70150 | v_loss: 1.23267 v_acc: 0.71712 |  iteration: 13396 teacher: 1 stage: sketch lr: 0.000382
batch 966 loss: 1.44944 acc: 0.70345 | v_loss: 1.41097 v_acc: 0.72786 |  iteration: 13397 teacher: 0 stage: sketch lr: 0.000382
batch 967 loss: 1.38915 acc: 0.70866 | v_loss: 1.47831 v_acc: 0.69792 |  iteration: 13398 teacher: 0 stage: sketch lr: 0.000382
batch 968 loss: 1.46274 acc: 0.69792 | v_loss: 1.40120 v_acc: 0.71842 |  iteration: 13399 teacher: 0 stage: sketch lr: 0.000382
batch 969 loss: 1.47673 acc: 0.70117 | v_loss: 1.25444 v_acc: 0.71810 |  iteration: 13400 teacher: 1 sta

batch 1026 loss: 1.36228 acc: 0.70833 | v_loss: 1.32671 v_acc: 0.71159 |  iteration: 13457 teacher: 0 stage: sketch lr: 0.000381
batch 1027 loss: 1.52943 acc: 0.69727 | v_loss: 1.35852 v_acc: 0.69336 |  iteration: 13458 teacher: 1 stage: sketch lr: 0.000381
batch 1028 loss: 1.49123 acc: 0.69076 | v_loss: 1.30255 v_acc: 0.71094 |  iteration: 13459 teacher: 0 stage: sketch lr: 0.000381
batch 1029 loss: 1.51296 acc: 0.69206 | v_loss: 1.36588 v_acc: 0.69661 |  iteration: 13460 teacher: 0 stage: sketch lr: 0.000381
batch 1030 loss: 1.51537 acc: 0.69792 | v_loss: 1.45920 v_acc: 0.71322 |  iteration: 13461 teacher: 1 stage: sketch lr: 0.000381
batch 1031 loss: 1.33441 acc: 0.70964 | v_loss: 1.31254 v_acc: 0.72396 |  iteration: 13462 teacher: 0 stage: sketch lr: 0.000381
batch 1032 loss: 1.50716 acc: 0.69596 | v_loss: 1.43672 v_acc: 0.70540 |  iteration: 13463 teacher: 0 stage: sketch lr: 0.000381
batch 1033 loss: 1.33387 acc: 0.71354 | v_loss: 1.35699 v_acc: 0.69954 |  iteration: 13464 teache

batch 1090 loss: 1.54789 acc: 0.69889 | v_loss: 1.50151 v_acc: 0.69010 |  iteration: 13521 teacher: 0 stage: sketch lr: 0.000380
batch 1091 loss: 1.42727 acc: 0.70215 | v_loss: 1.46609 v_acc: 0.70768 |  iteration: 13522 teacher: 0 stage: sketch lr: 0.000380
batch 1092 loss: 1.33751 acc: 0.71354 | v_loss: 1.41746 v_acc: 0.70410 |  iteration: 13523 teacher: 0 stage: sketch lr: 0.000380
batch 1093 loss: 1.44621 acc: 0.70020 | v_loss: 1.40542 v_acc: 0.70931 |  iteration: 13524 teacher: 0 stage: sketch lr: 0.000380
batch 1094 loss: 1.44035 acc: 0.70182 | v_loss: 1.39782 v_acc: 0.70410 |  iteration: 13525 teacher: 1 stage: sketch lr: 0.000380
batch 1095 loss: 1.38791 acc: 0.70573 | v_loss: 1.25339 v_acc: 0.71354 |  iteration: 13526 teacher: 1 stage: sketch lr: 0.000380
batch 1096 loss: 1.36859 acc: 0.71289 | v_loss: 1.33808 v_acc: 0.72135 |  iteration: 13527 teacher: 1 stage: sketch lr: 0.000380
batch 1097 loss: 1.39629 acc: 0.70020 | v_loss: 1.18009 v_acc: 0.70801 |  iteration: 13528 teache

batch 1154 loss: 1.43958 acc: 0.69368 | v_loss: 1.26151 v_acc: 0.71126 |  iteration: 13585 teacher: 0 stage: sketch lr: 0.000379
batch 1155 loss: 1.40612 acc: 0.70410 | v_loss: 1.44444 v_acc: 0.70573 |  iteration: 13586 teacher: 1 stage: sketch lr: 0.000379
batch 1156 loss: 1.40548 acc: 0.70312 | v_loss: 1.27655 v_acc: 0.71387 |  iteration: 13587 teacher: 1 stage: sketch lr: 0.000379
batch 1157 loss: 1.45760 acc: 0.69043 | v_loss: 1.45526 v_acc: 0.71289 |  iteration: 13588 teacher: 1 stage: sketch lr: 0.000379
batch 1158 loss: 1.45478 acc: 0.69727 | v_loss: 1.67378 v_acc: 0.69303 |  iteration: 13589 teacher: 0 stage: sketch lr: 0.000379
batch 1159 loss: 1.45917 acc: 0.69824 | v_loss: 1.52801 v_acc: 0.70117 |  iteration: 13590 teacher: 1 stage: sketch lr: 0.000379
batch 1160 loss: 1.37687 acc: 0.70898 | v_loss: 1.29348 v_acc: 0.72363 |  iteration: 13591 teacher: 0 stage: sketch lr: 0.000379
batch 1161 loss: 1.51926 acc: 0.69336 | v_loss: 1.38609 v_acc: 0.70410 |  iteration: 13592 teache

batch 1218 loss: 1.38767 acc: 0.71745 | v_loss: 1.30400 v_acc: 0.72201 |  iteration: 13649 teacher: 0 stage: sketch lr: 0.000378
batch 1219 loss: 1.51087 acc: 0.70736 | v_loss: 1.56940 v_acc: 0.69043 |  iteration: 13650 teacher: 1 stage: sketch lr: 0.000378
batch 1220 loss: 1.46244 acc: 0.70573 | v_loss: 1.43215 v_acc: 0.70215 |  iteration: 13651 teacher: 1 stage: sketch lr: 0.000378
batch 1221 loss: 1.42012 acc: 0.70475 | v_loss: 1.50672 v_acc: 0.68750 |  iteration: 13652 teacher: 0 stage: sketch lr: 0.000378
batch 1222 loss: 1.53337 acc: 0.69661 | v_loss: 1.38676 v_acc: 0.69824 |  iteration: 13653 teacher: 0 stage: sketch lr: 0.000378
batch 1223 loss: 1.43472 acc: 0.71029 | v_loss: 1.32629 v_acc: 0.70247 |  iteration: 13654 teacher: 0 stage: sketch lr: 0.000378
batch 1224 loss: 1.40655 acc: 0.71094 | v_loss: 1.34089 v_acc: 0.70020 |  iteration: 13655 teacher: 0 stage: sketch lr: 0.000378
batch 1225 loss: 1.43625 acc: 0.69922 | v_loss: 1.33602 v_acc: 0.71680 |  iteration: 13656 teache

batch 39 loss: 1.35566 acc: 0.70866 | v_loss: 1.37948 v_acc: 0.70020 |  iteration: 13713 teacher: 0 stage: sketch lr: 0.000377
batch 40 loss: 1.39114 acc: 0.71289 | v_loss: 1.32710 v_acc: 0.70215 |  iteration: 13714 teacher: 1 stage: sketch lr: 0.000377
batch 41 loss: 1.44508 acc: 0.69271 | v_loss: 1.32873 v_acc: 0.69889 |  iteration: 13715 teacher: 0 stage: sketch lr: 0.000377
batch 42 loss: 1.49543 acc: 0.70280 | v_loss: 1.33265 v_acc: 0.71419 |  iteration: 13716 teacher: 1 stage: sketch lr: 0.000377
batch 43 loss: 1.42982 acc: 0.70801 | v_loss: 1.52456 v_acc: 0.69173 |  iteration: 13717 teacher: 1 stage: sketch lr: 0.000377
batch 44 loss: 1.41854 acc: 0.70801 | v_loss: 1.37671 v_acc: 0.70247 |  iteration: 13718 teacher: 1 stage: sketch lr: 0.000377
batch 45 loss: 1.40860 acc: 0.70768 | v_loss: 1.34979 v_acc: 0.71029 |  iteration: 13719 teacher: 0 stage: sketch lr: 0.000377
batch 46 loss: 1.52921 acc: 0.69987 | v_loss: 1.38531 v_acc: 0.71680 |  iteration: 13720 teacher: 0 stage: sket

batch 104 loss: 1.53103 acc: 0.68913 | v_loss: 1.27862 v_acc: 0.71680 |  iteration: 13778 teacher: 1 stage: sketch lr: 0.000377
batch 105 loss: 1.44266 acc: 0.70475 | v_loss: 1.33396 v_acc: 0.74089 |  iteration: 13779 teacher: 0 stage: sketch lr: 0.000376
batch 106 loss: 1.38900 acc: 0.71322 | v_loss: 1.26146 v_acc: 0.72005 |  iteration: 13780 teacher: 1 stage: sketch lr: 0.000376
batch 107 loss: 1.44544 acc: 0.69759 | v_loss: 1.30113 v_acc: 0.71582 |  iteration: 13781 teacher: 1 stage: sketch lr: 0.000376
batch 108 loss: 1.36280 acc: 0.71029 | v_loss: 1.41438 v_acc: 0.71224 |  iteration: 13782 teacher: 1 stage: sketch lr: 0.000376
batch 109 loss: 1.45791 acc: 0.70703 | v_loss: 1.39061 v_acc: 0.72070 |  iteration: 13783 teacher: 1 stage: sketch lr: 0.000376
batch 110 loss: 1.42091 acc: 0.70508 | v_loss: 1.49297 v_acc: 0.69954 |  iteration: 13784 teacher: 1 stage: sketch lr: 0.000376
batch 111 loss: 1.36235 acc: 0.71191 | v_loss: 1.42251 v_acc: 0.71615 |  iteration: 13785 teacher: 0 sta

batch 169 loss: 1.47770 acc: 0.69759 | v_loss: 1.46339 v_acc: 0.69303 |  iteration: 13843 teacher: 1 stage: sketch lr: 0.000376
batch 170 loss: 1.44358 acc: 0.71224 | v_loss: 1.33587 v_acc: 0.71159 |  iteration: 13844 teacher: 0 stage: sketch lr: 0.000376
batch 171 loss: 1.43606 acc: 0.69792 | v_loss: 1.28560 v_acc: 0.71777 |  iteration: 13845 teacher: 1 stage: sketch lr: 0.000376
batch 172 loss: 1.44175 acc: 0.70801 | v_loss: 1.28339 v_acc: 0.71484 |  iteration: 13846 teacher: 1 stage: sketch lr: 0.000376
batch 173 loss: 1.37844 acc: 0.71322 | v_loss: 1.43569 v_acc: 0.70540 |  iteration: 13847 teacher: 1 stage: sketch lr: 0.000376
batch 174 loss: 1.39416 acc: 0.70801 | v_loss: 1.31260 v_acc: 0.73112 |  iteration: 13848 teacher: 1 stage: sketch lr: 0.000376
batch 175 loss: 1.51208 acc: 0.69954 | v_loss: 1.53787 v_acc: 0.71452 |  iteration: 13849 teacher: 0 stage: sketch lr: 0.000376
batch 176 loss: 1.40782 acc: 0.70475 | v_loss: 1.29868 v_acc: 0.69759 |  iteration: 13850 teacher: 1 sta

batch 234 loss: 1.39869 acc: 0.70117 | v_loss: 1.37552 v_acc: 0.71875 |  iteration: 13908 teacher: 0 stage: sketch lr: 0.000375
batch 235 loss: 1.42202 acc: 0.69759 | v_loss: 1.42108 v_acc: 0.70247 |  iteration: 13909 teacher: 0 stage: sketch lr: 0.000375
batch 236 loss: 1.34433 acc: 0.70768 | v_loss: 1.42847 v_acc: 0.70378 |  iteration: 13910 teacher: 1 stage: sketch lr: 0.000375
batch 237 loss: 1.39458 acc: 0.70898 | v_loss: 1.21981 v_acc: 0.71712 |  iteration: 13911 teacher: 1 stage: sketch lr: 0.000375
batch 238 loss: 1.47497 acc: 0.70508 | v_loss: 1.39201 v_acc: 0.72786 |  iteration: 13912 teacher: 1 stage: sketch lr: 0.000375
batch 239 loss: 1.45257 acc: 0.69564 | v_loss: 1.47429 v_acc: 0.69792 |  iteration: 13913 teacher: 0 stage: sketch lr: 0.000375
batch 240 loss: 1.38156 acc: 0.70898 | v_loss: 1.42426 v_acc: 0.72070 |  iteration: 13914 teacher: 1 stage: sketch lr: 0.000375
batch 241 loss: 1.52714 acc: 0.69108 | v_loss: 1.24805 v_acc: 0.72201 |  iteration: 13915 teacher: 0 sta

batch 299 loss: 1.50942 acc: 0.69108 | v_loss: 1.36205 v_acc: 0.69336 |  iteration: 13973 teacher: 1 stage: sketch lr: 0.000374
batch 300 loss: 1.41179 acc: 0.70540 | v_loss: 1.29765 v_acc: 0.71191 |  iteration: 13974 teacher: 1 stage: sketch lr: 0.000374
batch 301 loss: 1.38436 acc: 0.71191 | v_loss: 1.36735 v_acc: 0.69629 |  iteration: 13975 teacher: 0 stage: sketch lr: 0.000374
batch 302 loss: 1.32603 acc: 0.71224 | v_loss: 1.46957 v_acc: 0.70931 |  iteration: 13976 teacher: 0 stage: sketch lr: 0.000374
batch 303 loss: 1.35598 acc: 0.71875 | v_loss: 1.31704 v_acc: 0.72363 |  iteration: 13977 teacher: 1 stage: sketch lr: 0.000374
batch 304 loss: 1.50623 acc: 0.69434 | v_loss: 1.46585 v_acc: 0.70117 |  iteration: 13978 teacher: 1 stage: sketch lr: 0.000374
batch 305 loss: 1.56010 acc: 0.69824 | v_loss: 1.36169 v_acc: 0.70085 |  iteration: 13979 teacher: 0 stage: sketch lr: 0.000374
batch 306 loss: 1.46535 acc: 0.69694 | v_loss: 1.33656 v_acc: 0.70866 |  iteration: 13980 teacher: 1 sta

batch 364 loss: 1.49010 acc: 0.69792 | v_loss: 1.42247 v_acc: 0.70410 |  iteration: 14038 teacher: 1 stage: sketch lr: 0.000373
batch 365 loss: 1.28400 acc: 0.70736 | v_loss: 1.40697 v_acc: 0.70768 |  iteration: 14039 teacher: 0 stage: sketch lr: 0.000373
batch 366 loss: 1.51505 acc: 0.69889 | v_loss: 1.39981 v_acc: 0.70475 |  iteration: 14040 teacher: 0 stage: sketch lr: 0.000373
batch 367 loss: 1.47684 acc: 0.70215 | v_loss: 1.26208 v_acc: 0.71354 |  iteration: 14041 teacher: 1 stage: sketch lr: 0.000373
batch 368 loss: 1.48971 acc: 0.69238 | v_loss: 1.32280 v_acc: 0.72461 |  iteration: 14042 teacher: 1 stage: sketch lr: 0.000373
batch 369 loss: 1.37957 acc: 0.69987 | v_loss: 1.17612 v_acc: 0.71517 |  iteration: 14043 teacher: 1 stage: sketch lr: 0.000373
batch 370 loss: 1.56912 acc: 0.69401 | v_loss: 1.33817 v_acc: 0.70410 |  iteration: 14044 teacher: 0 stage: sketch lr: 0.000373
batch 371 loss: 1.36743 acc: 0.71126 | v_loss: 1.50025 v_acc: 0.69987 |  iteration: 14045 teacher: 1 sta

batch 429 loss: 1.49281 acc: 0.69727 | v_loss: 1.42271 v_acc: 0.72949 |  iteration: 14103 teacher: 0 stage: sketch lr: 0.000372
batch 430 loss: 1.38221 acc: 0.70540 | v_loss: 1.66940 v_acc: 0.69173 |  iteration: 14104 teacher: 0 stage: sketch lr: 0.000372
batch 431 loss: 1.40662 acc: 0.70215 | v_loss: 1.53009 v_acc: 0.69987 |  iteration: 14105 teacher: 0 stage: sketch lr: 0.000372
batch 432 loss: 1.51187 acc: 0.69303 | v_loss: 1.29340 v_acc: 0.72298 |  iteration: 14106 teacher: 1 stage: sketch lr: 0.000372
batch 433 loss: 1.40229 acc: 0.71354 | v_loss: 1.37689 v_acc: 0.70410 |  iteration: 14107 teacher: 1 stage: sketch lr: 0.000372
batch 434 loss: 1.39516 acc: 0.70508 | v_loss: 1.21326 v_acc: 0.71973 |  iteration: 14108 teacher: 1 stage: sketch lr: 0.000372
batch 435 loss: 1.54364 acc: 0.68783 | v_loss: 1.41952 v_acc: 0.70150 |  iteration: 14109 teacher: 1 stage: sketch lr: 0.000372
batch 436 loss: 1.33491 acc: 0.71159 | v_loss: 1.35887 v_acc: 0.71810 |  iteration: 14110 teacher: 0 sta

batch 494 loss: 1.40621 acc: 0.71191 | v_loss: 1.37071 v_acc: 0.70182 |  iteration: 14168 teacher: 1 stage: sketch lr: 0.000371
batch 495 loss: 1.37396 acc: 0.71159 | v_loss: 1.33124 v_acc: 0.70638 |  iteration: 14169 teacher: 1 stage: sketch lr: 0.000371
batch 496 loss: 1.46227 acc: 0.70345 | v_loss: 1.32875 v_acc: 0.70020 |  iteration: 14170 teacher: 1 stage: sketch lr: 0.000371
batch 497 loss: 1.51657 acc: 0.69824 | v_loss: 1.32749 v_acc: 0.71484 |  iteration: 14171 teacher: 1 stage: sketch lr: 0.000371
batch 498 loss: 1.34272 acc: 0.70703 | v_loss: 1.52263 v_acc: 0.68978 |  iteration: 14172 teacher: 1 stage: sketch lr: 0.000371
batch 499 loss: 1.49835 acc: 0.69531 | v_loss: 1.37780 v_acc: 0.70443 |  iteration: 14173 teacher: 0 stage: sketch lr: 0.000371
batch 500 loss: 1.39522 acc: 0.70410 | v_loss: 1.36895 v_acc: 0.71029 |  iteration: 14174 teacher: 0 stage: sketch lr: 0.000371
batch 501 loss: 1.42117 acc: 0.69922 | v_loss: 1.38166 v_acc: 0.71680 |  iteration: 14175 teacher: 1 sta

batch 559 loss: 1.39657 acc: 0.70866 | v_loss: 1.26950 v_acc: 0.71908 |  iteration: 14233 teacher: 0 stage: sketch lr: 0.000370
batch 560 loss: 1.52447 acc: 0.69596 | v_loss: 1.35825 v_acc: 0.73112 |  iteration: 14234 teacher: 1 stage: sketch lr: 0.000370
batch 561 loss: 1.36546 acc: 0.70866 | v_loss: 1.26829 v_acc: 0.72038 |  iteration: 14235 teacher: 0 stage: sketch lr: 0.000370
batch 562 loss: 1.32994 acc: 0.70605 | v_loss: 1.31311 v_acc: 0.72005 |  iteration: 14236 teacher: 0 stage: sketch lr: 0.000370
batch 563 loss: 1.37940 acc: 0.70671 | v_loss: 1.42498 v_acc: 0.71224 |  iteration: 14237 teacher: 0 stage: sketch lr: 0.000370
batch 564 loss: 1.53814 acc: 0.69759 | v_loss: 1.40737 v_acc: 0.72070 |  iteration: 14238 teacher: 0 stage: sketch lr: 0.000370
batch 565 loss: 1.37729 acc: 0.70898 | v_loss: 1.49029 v_acc: 0.69954 |  iteration: 14239 teacher: 0 stage: sketch lr: 0.000370
batch 566 loss: 1.43713 acc: 0.69596 | v_loss: 1.43098 v_acc: 0.71615 |  iteration: 14240 teacher: 0 sta

batch 624 loss: 1.37786 acc: 0.70605 | v_loss: 1.49242 v_acc: 0.69368 |  iteration: 14298 teacher: 0 stage: sketch lr: 0.000370
batch 625 loss: 1.32999 acc: 0.70931 | v_loss: 1.33688 v_acc: 0.71452 |  iteration: 14299 teacher: 1 stage: sketch lr: 0.000370
batch 626 loss: 1.51099 acc: 0.70345 | v_loss: 1.29081 v_acc: 0.71615 |  iteration: 14300 teacher: 1 stage: sketch lr: 0.000370
batch 627 loss: 1.45938 acc: 0.70085 | v_loss: 1.28936 v_acc: 0.71973 |  iteration: 14301 teacher: 0 stage: sketch lr: 0.000370
batch 628 loss: 1.46659 acc: 0.69694 | v_loss: 1.42698 v_acc: 0.70540 |  iteration: 14302 teacher: 0 stage: sketch lr: 0.000370
batch 629 loss: 1.46134 acc: 0.70410 | v_loss: 1.31046 v_acc: 0.73210 |  iteration: 14303 teacher: 1 stage: sketch lr: 0.000370
batch 630 loss: 1.38553 acc: 0.70410 | v_loss: 1.53994 v_acc: 0.71615 |  iteration: 14304 teacher: 0 stage: sketch lr: 0.000370
batch 631 loss: 1.47664 acc: 0.69954 | v_loss: 1.28564 v_acc: 0.69759 |  iteration: 14305 teacher: 0 sta

batch 689 loss: 1.55095 acc: 0.68880 | v_loss: 1.38802 v_acc: 0.72266 |  iteration: 14363 teacher: 1 stage: sketch lr: 0.000369
batch 690 loss: 1.51274 acc: 0.70378 | v_loss: 1.41546 v_acc: 0.70410 |  iteration: 14364 teacher: 1 stage: sketch lr: 0.000369
batch 691 loss: 1.40128 acc: 0.70703 | v_loss: 1.41809 v_acc: 0.70833 |  iteration: 14365 teacher: 0 stage: sketch lr: 0.000369
batch 692 loss: 1.44037 acc: 0.70280 | v_loss: 1.22014 v_acc: 0.72591 |  iteration: 14366 teacher: 1 stage: sketch lr: 0.000369
batch 693 loss: 1.46081 acc: 0.70052 | v_loss: 1.37671 v_acc: 0.72786 |  iteration: 14367 teacher: 1 stage: sketch lr: 0.000369
batch 694 loss: 1.41902 acc: 0.70345 | v_loss: 1.46296 v_acc: 0.69792 |  iteration: 14368 teacher: 1 stage: sketch lr: 0.000369
batch 695 loss: 1.39714 acc: 0.70215 | v_loss: 1.41496 v_acc: 0.72070 |  iteration: 14369 teacher: 0 stage: sketch lr: 0.000369
batch 696 loss: 1.42539 acc: 0.71126 | v_loss: 1.26227 v_acc: 0.71973 |  iteration: 14370 teacher: 0 sta

batch 754 loss: 1.37814 acc: 0.70736 | v_loss: 1.37153 v_acc: 0.69368 |  iteration: 14428 teacher: 1 stage: sketch lr: 0.000368
batch 755 loss: 1.48643 acc: 0.69824 | v_loss: 1.28889 v_acc: 0.71842 |  iteration: 14429 teacher: 0 stage: sketch lr: 0.000368
batch 756 loss: 1.52801 acc: 0.69141 | v_loss: 1.35708 v_acc: 0.69629 |  iteration: 14430 teacher: 0 stage: sketch lr: 0.000368
batch 757 loss: 1.35877 acc: 0.71354 | v_loss: 1.46805 v_acc: 0.71419 |  iteration: 14431 teacher: 0 stage: sketch lr: 0.000368
batch 758 loss: 1.45315 acc: 0.69629 | v_loss: 1.31319 v_acc: 0.72884 |  iteration: 14432 teacher: 0 stage: sketch lr: 0.000368
batch 759 loss: 1.41569 acc: 0.70996 | v_loss: 1.44952 v_acc: 0.70117 |  iteration: 14433 teacher: 0 stage: sketch lr: 0.000368
batch 760 loss: 1.37829 acc: 0.70150 | v_loss: 1.35938 v_acc: 0.70085 |  iteration: 14434 teacher: 1 stage: sketch lr: 0.000368
batch 761 loss: 1.45081 acc: 0.70182 | v_loss: 1.33181 v_acc: 0.70703 |  iteration: 14435 teacher: 1 sta

batch 819 loss: 1.32923 acc: 0.71224 | v_loss: 1.41649 v_acc: 0.70085 |  iteration: 14493 teacher: 0 stage: sketch lr: 0.000367
batch 820 loss: 1.38109 acc: 0.70833 | v_loss: 1.40564 v_acc: 0.70931 |  iteration: 14494 teacher: 1 stage: sketch lr: 0.000367
batch 821 loss: 1.48981 acc: 0.71126 | v_loss: 1.40488 v_acc: 0.70703 |  iteration: 14495 teacher: 1 stage: sketch lr: 0.000367
batch 822 loss: 1.36371 acc: 0.70312 | v_loss: 1.25739 v_acc: 0.71875 |  iteration: 14496 teacher: 0 stage: sketch lr: 0.000367
batch 823 loss: 1.41590 acc: 0.71126 | v_loss: 1.32113 v_acc: 0.72461 |  iteration: 14497 teacher: 0 stage: sketch lr: 0.000367
batch 824 loss: 1.41363 acc: 0.70540 | v_loss: 1.18710 v_acc: 0.71517 |  iteration: 14498 teacher: 0 stage: sketch lr: 0.000367
batch 825 loss: 1.50635 acc: 0.69922 | v_loss: 1.34862 v_acc: 0.70247 |  iteration: 14499 teacher: 1 stage: sketch lr: 0.000367
batch 826 loss: 1.40451 acc: 0.70410 | v_loss: 1.50720 v_acc: 0.69792 |  iteration: 14500 teacher: 1 sta

batch 884 loss: 1.41544 acc: 0.69824 | v_loss: 1.47732 v_acc: 0.70475 |  iteration: 14558 teacher: 1 stage: sketch lr: 0.000366
batch 885 loss: 1.42923 acc: 0.70247 | v_loss: 1.69641 v_acc: 0.69141 |  iteration: 14559 teacher: 0 stage: sketch lr: 0.000366
batch 886 loss: 1.42733 acc: 0.71029 | v_loss: 1.54919 v_acc: 0.69531 |  iteration: 14560 teacher: 0 stage: sketch lr: 0.000366
batch 887 loss: 1.53998 acc: 0.69531 | v_loss: 1.29088 v_acc: 0.72526 |  iteration: 14561 teacher: 0 stage: sketch lr: 0.000366
batch 888 loss: 1.39953 acc: 0.70247 | v_loss: 1.36963 v_acc: 0.70996 |  iteration: 14562 teacher: 1 stage: sketch lr: 0.000366
batch 889 loss: 1.40417 acc: 0.70085 | v_loss: 1.21551 v_acc: 0.72201 |  iteration: 14563 teacher: 1 stage: sketch lr: 0.000366
batch 890 loss: 1.49029 acc: 0.69368 | v_loss: 1.41887 v_acc: 0.70378 |  iteration: 14564 teacher: 1 stage: sketch lr: 0.000366
batch 891 loss: 1.55816 acc: 0.70475 | v_loss: 1.35163 v_acc: 0.71224 |  iteration: 14565 teacher: 0 sta

batch 949 loss: 1.50814 acc: 0.69694 | v_loss: 1.37223 v_acc: 0.70182 |  iteration: 14623 teacher: 1 stage: sketch lr: 0.000365
batch 950 loss: 1.36947 acc: 0.70931 | v_loss: 1.32576 v_acc: 0.70638 |  iteration: 14624 teacher: 1 stage: sketch lr: 0.000365
batch 951 loss: 1.42670 acc: 0.70117 | v_loss: 1.32330 v_acc: 0.70410 |  iteration: 14625 teacher: 0 stage: sketch lr: 0.000365
batch 952 loss: 1.46339 acc: 0.70020 | v_loss: 1.31958 v_acc: 0.71842 |  iteration: 14626 teacher: 0 stage: sketch lr: 0.000365
batch 953 loss: 1.42094 acc: 0.70736 | v_loss: 1.55310 v_acc: 0.68848 |  iteration: 14627 teacher: 0 stage: sketch lr: 0.000365
batch 954 loss: 1.55428 acc: 0.69564 | v_loss: 1.38920 v_acc: 0.70573 |  iteration: 14628 teacher: 1 stage: sketch lr: 0.000365
batch 955 loss: 1.50865 acc: 0.70312 | v_loss: 1.36482 v_acc: 0.70866 |  iteration: 14629 teacher: 0 stage: sketch lr: 0.000365
batch 956 loss: 1.41972 acc: 0.70378 | v_loss: 1.37921 v_acc: 0.71191 |  iteration: 14630 teacher: 1 sta

batch 1013 loss: 1.40175 acc: 0.71061 | v_loss: 1.25281 v_acc: 0.73503 |  iteration: 14687 teacher: 1 stage: sketch lr: 0.000365
batch 1014 loss: 1.39485 acc: 0.71159 | v_loss: 1.27930 v_acc: 0.72168 |  iteration: 14688 teacher: 0 stage: sketch lr: 0.000365
batch 1015 loss: 1.51174 acc: 0.69141 | v_loss: 1.34588 v_acc: 0.73145 |  iteration: 14689 teacher: 1 stage: sketch lr: 0.000365
batch 1016 loss: 1.43811 acc: 0.70573 | v_loss: 1.26200 v_acc: 0.72461 |  iteration: 14690 teacher: 1 stage: sketch lr: 0.000365
batch 1017 loss: 1.53415 acc: 0.69824 | v_loss: 1.31401 v_acc: 0.72005 |  iteration: 14691 teacher: 0 stage: sketch lr: 0.000365
batch 1018 loss: 1.44091 acc: 0.70443 | v_loss: 1.42107 v_acc: 0.71224 |  iteration: 14692 teacher: 0 stage: sketch lr: 0.000365
batch 1019 loss: 1.44958 acc: 0.70443 | v_loss: 1.39679 v_acc: 0.72070 |  iteration: 14693 teacher: 1 stage: sketch lr: 0.000365
batch 1020 loss: 1.43858 acc: 0.71289 | v_loss: 1.47793 v_acc: 0.69661 |  iteration: 14694 teache

batch 1077 loss: 1.47840 acc: 0.69954 | v_loss: 1.28296 v_acc: 0.72233 |  iteration: 14751 teacher: 1 stage: sketch lr: 0.000364
batch 1078 loss: 1.43132 acc: 0.69466 | v_loss: 1.30317 v_acc: 0.72135 |  iteration: 14752 teacher: 0 stage: sketch lr: 0.000364
batch 1079 loss: 1.43640 acc: 0.70801 | v_loss: 1.48567 v_acc: 0.69303 |  iteration: 14753 teacher: 0 stage: sketch lr: 0.000364
batch 1080 loss: 1.45474 acc: 0.70573 | v_loss: 1.32853 v_acc: 0.71159 |  iteration: 14754 teacher: 0 stage: sketch lr: 0.000364
batch 1081 loss: 1.38969 acc: 0.71224 | v_loss: 1.30679 v_acc: 0.71549 |  iteration: 14755 teacher: 0 stage: sketch lr: 0.000364
batch 1082 loss: 1.44755 acc: 0.69889 | v_loss: 1.28771 v_acc: 0.72070 |  iteration: 14756 teacher: 0 stage: sketch lr: 0.000364
batch 1083 loss: 1.37620 acc: 0.71191 | v_loss: 1.43675 v_acc: 0.70540 |  iteration: 14757 teacher: 0 stage: sketch lr: 0.000364
batch 1084 loss: 1.45449 acc: 0.70052 | v_loss: 1.30749 v_acc: 0.73112 |  iteration: 14758 teache

batch 1141 loss: 1.38314 acc: 0.70768 | v_loss: 1.42799 v_acc: 0.71289 |  iteration: 14815 teacher: 0 stage: sketch lr: 0.000363
batch 1142 loss: 1.48772 acc: 0.69271 | v_loss: 1.28710 v_acc: 0.71875 |  iteration: 14816 teacher: 0 stage: sketch lr: 0.000363
batch 1143 loss: 1.48645 acc: 0.70410 | v_loss: 1.25819 v_acc: 0.72754 |  iteration: 14817 teacher: 1 stage: sketch lr: 0.000363
batch 1144 loss: 1.40527 acc: 0.70996 | v_loss: 1.37008 v_acc: 0.71810 |  iteration: 14818 teacher: 0 stage: sketch lr: 0.000363
batch 1145 loss: 1.47836 acc: 0.69076 | v_loss: 1.42597 v_acc: 0.70182 |  iteration: 14819 teacher: 0 stage: sketch lr: 0.000363
batch 1146 loss: 1.46409 acc: 0.70671 | v_loss: 1.43045 v_acc: 0.70345 |  iteration: 14820 teacher: 1 stage: sketch lr: 0.000363
batch 1147 loss: 1.40467 acc: 0.71094 | v_loss: 1.25467 v_acc: 0.71126 |  iteration: 14821 teacher: 0 stage: sketch lr: 0.000363
batch 1148 loss: 1.43986 acc: 0.70345 | v_loss: 1.39188 v_acc: 0.72689 |  iteration: 14822 teache

batch 1205 loss: 1.45142 acc: 0.70801 | v_loss: 1.25605 v_acc: 0.70996 |  iteration: 14879 teacher: 1 stage: sketch lr: 0.000362
batch 1206 loss: 1.44889 acc: 0.68913 | v_loss: 1.53486 v_acc: 0.69336 |  iteration: 14880 teacher: 0 stage: sketch lr: 0.000362
batch 1207 loss: 1.39416 acc: 0.69564 | v_loss: 1.23185 v_acc: 0.69043 |  iteration: 14881 teacher: 0 stage: sketch lr: 0.000362
batch 1208 loss: 1.44625 acc: 0.69694 | v_loss: 1.32888 v_acc: 0.70247 |  iteration: 14882 teacher: 1 stage: sketch lr: 0.000362
batch 1209 loss: 1.36611 acc: 0.70247 | v_loss: 1.35302 v_acc: 0.69661 |  iteration: 14883 teacher: 0 stage: sketch lr: 0.000362
batch 1210 loss: 1.47728 acc: 0.70247 | v_loss: 1.29957 v_acc: 0.71126 |  iteration: 14884 teacher: 1 stage: sketch lr: 0.000362
batch 1211 loss: 1.38813 acc: 0.70117 | v_loss: 1.36438 v_acc: 0.69759 |  iteration: 14885 teacher: 1 stage: sketch lr: 0.000362
batch 1212 loss: 1.45604 acc: 0.70020 | v_loss: 1.48735 v_acc: 0.71224 |  iteration: 14886 teache

batch 26 loss: 1.46915 acc: 0.70182 | v_loss: 1.36572 v_acc: 0.69336 |  iteration: 14943 teacher: 0 stage: sketch lr: 0.000362
batch 27 loss: 1.42901 acc: 0.69596 | v_loss: 1.28778 v_acc: 0.71191 |  iteration: 14944 teacher: 1 stage: sketch lr: 0.000362
batch 28 loss: 1.41070 acc: 0.71419 | v_loss: 1.35657 v_acc: 0.69629 |  iteration: 14945 teacher: 0 stage: sketch lr: 0.000362
batch 29 loss: 1.31422 acc: 0.70280 | v_loss: 1.48232 v_acc: 0.71517 |  iteration: 14946 teacher: 0 stage: sketch lr: 0.000361
batch 30 loss: 1.54051 acc: 0.69076 | v_loss: 1.31647 v_acc: 0.72689 |  iteration: 14947 teacher: 0 stage: sketch lr: 0.000361
batch 31 loss: 1.59237 acc: 0.68457 | v_loss: 1.44633 v_acc: 0.70443 |  iteration: 14948 teacher: 1 stage: sketch lr: 0.000361
batch 32 loss: 1.52091 acc: 0.69987 | v_loss: 1.36796 v_acc: 0.69922 |  iteration: 14949 teacher: 0 stage: sketch lr: 0.000361
batch 33 loss: 1.35119 acc: 0.70605 | v_loss: 1.31658 v_acc: 0.70898 |  iteration: 14950 teacher: 1 stage: sket

batch 91 loss: 1.34611 acc: 0.70150 | v_loss: 1.42670 v_acc: 0.70410 |  iteration: 15008 teacher: 1 stage: sketch lr: 0.000361
batch 92 loss: 1.42113 acc: 0.70866 | v_loss: 1.41337 v_acc: 0.70768 |  iteration: 15009 teacher: 0 stage: sketch lr: 0.000361
batch 93 loss: 1.34056 acc: 0.71647 | v_loss: 1.39154 v_acc: 0.70475 |  iteration: 15010 teacher: 1 stage: sketch lr: 0.000361
batch 94 loss: 1.41637 acc: 0.70508 | v_loss: 1.25988 v_acc: 0.71354 |  iteration: 15011 teacher: 0 stage: sketch lr: 0.000361
batch 95 loss: 1.44888 acc: 0.70247 | v_loss: 1.32377 v_acc: 0.72461 |  iteration: 15012 teacher: 1 stage: sketch lr: 0.000361
batch 96 loss: 1.40469 acc: 0.71191 | v_loss: 1.17764 v_acc: 0.71517 |  iteration: 15013 teacher: 1 stage: sketch lr: 0.000361
batch 97 loss: 1.56445 acc: 0.69922 | v_loss: 1.34610 v_acc: 0.70703 |  iteration: 15014 teacher: 0 stage: sketch lr: 0.000361
batch 98 loss: 1.43310 acc: 0.70833 | v_loss: 1.50510 v_acc: 0.69987 |  iteration: 15015 teacher: 0 stage: sket

batch 156 loss: 1.36112 acc: 0.71061 | v_loss: 1.43528 v_acc: 0.71647 |  iteration: 15073 teacher: 0 stage: sketch lr: 0.000360
batch 157 loss: 1.37179 acc: 0.70378 | v_loss: 1.66590 v_acc: 0.69303 |  iteration: 15074 teacher: 1 stage: sketch lr: 0.000360
batch 158 loss: 1.43405 acc: 0.69694 | v_loss: 1.52751 v_acc: 0.69857 |  iteration: 15075 teacher: 1 stage: sketch lr: 0.000360
batch 159 loss: 1.52368 acc: 0.69889 | v_loss: 1.29179 v_acc: 0.72396 |  iteration: 15076 teacher: 1 stage: sketch lr: 0.000360
batch 160 loss: 1.39426 acc: 0.70736 | v_loss: 1.37047 v_acc: 0.70931 |  iteration: 15077 teacher: 0 stage: sketch lr: 0.000360
batch 161 loss: 1.29736 acc: 0.72201 | v_loss: 1.22166 v_acc: 0.71973 |  iteration: 15078 teacher: 0 stage: sketch lr: 0.000360
batch 162 loss: 1.33362 acc: 0.70996 | v_loss: 1.42481 v_acc: 0.70182 |  iteration: 15079 teacher: 0 stage: sketch lr: 0.000360
batch 163 loss: 1.41441 acc: 0.69987 | v_loss: 1.36006 v_acc: 0.71810 |  iteration: 15080 teacher: 1 sta

batch 221 loss: 1.26766 acc: 0.71745 | v_loss: 1.36934 v_acc: 0.70182 |  iteration: 15138 teacher: 1 stage: sketch lr: 0.000359
batch 222 loss: 1.42087 acc: 0.70736 | v_loss: 1.33146 v_acc: 0.70638 |  iteration: 15139 teacher: 0 stage: sketch lr: 0.000359
batch 223 loss: 1.48013 acc: 0.70150 | v_loss: 1.33953 v_acc: 0.70117 |  iteration: 15140 teacher: 0 stage: sketch lr: 0.000359
batch 224 loss: 1.31988 acc: 0.70866 | v_loss: 1.33482 v_acc: 0.71484 |  iteration: 15141 teacher: 0 stage: sketch lr: 0.000359
batch 225 loss: 1.56290 acc: 0.69076 | v_loss: 1.54065 v_acc: 0.68978 |  iteration: 15142 teacher: 0 stage: sketch lr: 0.000359
batch 226 loss: 1.42704 acc: 0.69466 | v_loss: 1.38842 v_acc: 0.70443 |  iteration: 15143 teacher: 1 stage: sketch lr: 0.000359
batch 227 loss: 1.53922 acc: 0.69824 | v_loss: 1.34291 v_acc: 0.70898 |  iteration: 15144 teacher: 1 stage: sketch lr: 0.000359
batch 228 loss: 1.41552 acc: 0.70508 | v_loss: 1.37942 v_acc: 0.71582 |  iteration: 15145 teacher: 0 sta

batch 286 loss: 1.44392 acc: 0.70345 | v_loss: 1.27538 v_acc: 0.71908 |  iteration: 15203 teacher: 1 stage: sketch lr: 0.000358
batch 287 loss: 1.37781 acc: 0.71322 | v_loss: 1.34146 v_acc: 0.73112 |  iteration: 15204 teacher: 1 stage: sketch lr: 0.000358
batch 288 loss: 1.32001 acc: 0.72201 | v_loss: 1.25965 v_acc: 0.71973 |  iteration: 15205 teacher: 0 stage: sketch lr: 0.000358
batch 289 loss: 1.40983 acc: 0.71940 | v_loss: 1.29869 v_acc: 0.72266 |  iteration: 15206 teacher: 0 stage: sketch lr: 0.000358
batch 290 loss: 1.40620 acc: 0.71126 | v_loss: 1.40138 v_acc: 0.71419 |  iteration: 15207 teacher: 1 stage: sketch lr: 0.000358
batch 291 loss: 1.36858 acc: 0.70866 | v_loss: 1.37995 v_acc: 0.72103 |  iteration: 15208 teacher: 1 stage: sketch lr: 0.000358
batch 292 loss: 1.45393 acc: 0.70410 | v_loss: 1.48852 v_acc: 0.69661 |  iteration: 15209 teacher: 0 stage: sketch lr: 0.000358
batch 293 loss: 1.52375 acc: 0.69987 | v_loss: 1.40475 v_acc: 0.71745 |  iteration: 15210 teacher: 0 sta

batch 351 loss: 1.45001 acc: 0.70410 | v_loss: 1.47009 v_acc: 0.69206 |  iteration: 15268 teacher: 0 stage: sketch lr: 0.000358
batch 352 loss: 1.60133 acc: 0.69173 | v_loss: 1.32791 v_acc: 0.70931 |  iteration: 15269 teacher: 0 stage: sketch lr: 0.000358
batch 353 loss: 1.57478 acc: 0.68164 | v_loss: 1.30158 v_acc: 0.71452 |  iteration: 15270 teacher: 0 stage: sketch lr: 0.000358
batch 354 loss: 1.52998 acc: 0.69629 | v_loss: 1.29560 v_acc: 0.71940 |  iteration: 15271 teacher: 0 stage: sketch lr: 0.000358
batch 355 loss: 1.43441 acc: 0.70117 | v_loss: 1.43039 v_acc: 0.70475 |  iteration: 15272 teacher: 0 stage: sketch lr: 0.000358
batch 356 loss: 1.50244 acc: 0.69499 | v_loss: 1.30460 v_acc: 0.73210 |  iteration: 15273 teacher: 1 stage: sketch lr: 0.000358
batch 357 loss: 1.45822 acc: 0.70410 | v_loss: 1.52773 v_acc: 0.71582 |  iteration: 15274 teacher: 0 stage: sketch lr: 0.000358
batch 358 loss: 1.49253 acc: 0.69792 | v_loss: 1.28786 v_acc: 0.69759 |  iteration: 15275 teacher: 0 sta

batch 416 loss: 1.49628 acc: 0.69824 | v_loss: 1.36619 v_acc: 0.71582 |  iteration: 15333 teacher: 0 stage: sketch lr: 0.000357
batch 417 loss: 1.31591 acc: 0.72526 | v_loss: 1.41795 v_acc: 0.70410 |  iteration: 15334 teacher: 0 stage: sketch lr: 0.000357
batch 418 loss: 1.33616 acc: 0.71029 | v_loss: 1.41968 v_acc: 0.70508 |  iteration: 15335 teacher: 1 stage: sketch lr: 0.000357
batch 419 loss: 1.39807 acc: 0.70964 | v_loss: 1.22502 v_acc: 0.71712 |  iteration: 15336 teacher: 1 stage: sketch lr: 0.000357
batch 420 loss: 1.39766 acc: 0.70898 | v_loss: 1.39112 v_acc: 0.72917 |  iteration: 15337 teacher: 1 stage: sketch lr: 0.000357
batch 421 loss: 1.36514 acc: 0.70671 | v_loss: 1.48212 v_acc: 0.69759 |  iteration: 15338 teacher: 0 stage: sketch lr: 0.000357
batch 422 loss: 1.38369 acc: 0.71647 | v_loss: 1.43656 v_acc: 0.72201 |  iteration: 15339 teacher: 1 stage: sketch lr: 0.000357
batch 423 loss: 1.46282 acc: 0.70475 | v_loss: 1.24936 v_acc: 0.72233 |  iteration: 15340 teacher: 0 sta

batch 481 loss: 1.49385 acc: 0.69889 | v_loss: 1.36825 v_acc: 0.69368 |  iteration: 15398 teacher: 0 stage: sketch lr: 0.000356
batch 482 loss: 1.37852 acc: 0.70052 | v_loss: 1.28889 v_acc: 0.71842 |  iteration: 15399 teacher: 0 stage: sketch lr: 0.000356
batch 483 loss: 1.39880 acc: 0.69824 | v_loss: 1.36229 v_acc: 0.70215 |  iteration: 15400 teacher: 1 stage: sketch lr: 0.000356
batch 484 loss: 1.40496 acc: 0.70931 | v_loss: 1.48043 v_acc: 0.72005 |  iteration: 15401 teacher: 1 stage: sketch lr: 0.000356
batch 485 loss: 1.35015 acc: 0.71549 | v_loss: 1.31818 v_acc: 0.72689 |  iteration: 15402 teacher: 1 stage: sketch lr: 0.000356
batch 486 loss: 1.38323 acc: 0.69173 | v_loss: 1.45462 v_acc: 0.70443 |  iteration: 15403 teacher: 0 stage: sketch lr: 0.000356
batch 487 loss: 1.38617 acc: 0.69954 | v_loss: 1.35652 v_acc: 0.69922 |  iteration: 15404 teacher: 1 stage: sketch lr: 0.000356
batch 488 loss: 1.61625 acc: 0.69173 | v_loss: 1.32424 v_acc: 0.70898 |  iteration: 15405 teacher: 1 sta

batch 546 loss: 1.35024 acc: 0.70931 | v_loss: 1.43773 v_acc: 0.70085 |  iteration: 15463 teacher: 1 stage: sketch lr: 0.000355
batch 547 loss: 1.46784 acc: 0.70052 | v_loss: 1.41834 v_acc: 0.70931 |  iteration: 15464 teacher: 1 stage: sketch lr: 0.000355
batch 548 loss: 1.61379 acc: 0.68197 | v_loss: 1.39442 v_acc: 0.70703 |  iteration: 15465 teacher: 0 stage: sketch lr: 0.000355
batch 549 loss: 1.38456 acc: 0.70768 | v_loss: 1.25733 v_acc: 0.71875 |  iteration: 15466 teacher: 0 stage: sketch lr: 0.000355
batch 550 loss: 1.37231 acc: 0.71419 | v_loss: 1.31882 v_acc: 0.72331 |  iteration: 15467 teacher: 1 stage: sketch lr: 0.000355
batch 551 loss: 1.42355 acc: 0.71061 | v_loss: 1.19803 v_acc: 0.71517 |  iteration: 15468 teacher: 1 stage: sketch lr: 0.000355
batch 552 loss: 1.44318 acc: 0.69727 | v_loss: 1.34103 v_acc: 0.70703 |  iteration: 15469 teacher: 1 stage: sketch lr: 0.000355
batch 553 loss: 1.37311 acc: 0.70410 | v_loss: 1.49950 v_acc: 0.69987 |  iteration: 15470 teacher: 0 sta

batch 611 loss: 1.40524 acc: 0.70052 | v_loss: 1.43727 v_acc: 0.71647 |  iteration: 15528 teacher: 0 stage: sketch lr: 0.000355
batch 612 loss: 1.42751 acc: 0.69303 | v_loss: 1.66963 v_acc: 0.69271 |  iteration: 15529 teacher: 0 stage: sketch lr: 0.000355
batch 613 loss: 1.52005 acc: 0.68945 | v_loss: 1.52693 v_acc: 0.70117 |  iteration: 15530 teacher: 1 stage: sketch lr: 0.000355
batch 614 loss: 1.44559 acc: 0.70736 | v_loss: 1.29348 v_acc: 0.72363 |  iteration: 15531 teacher: 0 stage: sketch lr: 0.000355
batch 615 loss: 1.44557 acc: 0.69987 | v_loss: 1.38007 v_acc: 0.70410 |  iteration: 15532 teacher: 0 stage: sketch lr: 0.000355
batch 616 loss: 1.44258 acc: 0.70345 | v_loss: 1.23254 v_acc: 0.71973 |  iteration: 15533 teacher: 1 stage: sketch lr: 0.000355
batch 617 loss: 1.40921 acc: 0.70703 | v_loss: 1.42774 v_acc: 0.70150 |  iteration: 15534 teacher: 1 stage: sketch lr: 0.000355
batch 618 loss: 1.44138 acc: 0.70085 | v_loss: 1.35438 v_acc: 0.71094 |  iteration: 15535 teacher: 0 sta

batch 676 loss: 1.46216 acc: 0.70833 | v_loss: 1.36486 v_acc: 0.70020 |  iteration: 15593 teacher: 0 stage: sketch lr: 0.000354
batch 677 loss: 1.45392 acc: 0.70833 | v_loss: 1.32254 v_acc: 0.70312 |  iteration: 15594 teacher: 0 stage: sketch lr: 0.000354
batch 678 loss: 1.57816 acc: 0.68815 | v_loss: 1.32544 v_acc: 0.70020 |  iteration: 15595 teacher: 0 stage: sketch lr: 0.000354
batch 679 loss: 1.46373 acc: 0.69499 | v_loss: 1.32206 v_acc: 0.71484 |  iteration: 15596 teacher: 0 stage: sketch lr: 0.000354
batch 680 loss: 1.43446 acc: 0.69727 | v_loss: 1.53484 v_acc: 0.68978 |  iteration: 15597 teacher: 1 stage: sketch lr: 0.000354
batch 681 loss: 1.45415 acc: 0.70736 | v_loss: 1.38495 v_acc: 0.70443 |  iteration: 15598 teacher: 0 stage: sketch lr: 0.000354
batch 682 loss: 1.47763 acc: 0.70345 | v_loss: 1.34447 v_acc: 0.71029 |  iteration: 15599 teacher: 0 stage: sketch lr: 0.000354
batch 683 loss: 1.38267 acc: 0.71029 | v_loss: 1.38316 v_acc: 0.71680 |  iteration: 15600 teacher: 1 sta

batch 741 loss: 1.46932 acc: 0.71159 | v_loss: 1.26781 v_acc: 0.72461 |  iteration: 15658 teacher: 1 stage: sketch lr: 0.000353
batch 742 loss: 1.42217 acc: 0.70182 | v_loss: 1.35580 v_acc: 0.72689 |  iteration: 15659 teacher: 0 stage: sketch lr: 0.000353
batch 743 loss: 1.49070 acc: 0.69727 | v_loss: 1.26680 v_acc: 0.72461 |  iteration: 15660 teacher: 0 stage: sketch lr: 0.000353
batch 744 loss: 1.48690 acc: 0.70085 | v_loss: 1.31233 v_acc: 0.72005 |  iteration: 15661 teacher: 0 stage: sketch lr: 0.000353
batch 745 loss: 1.42204 acc: 0.70540 | v_loss: 1.42378 v_acc: 0.71289 |  iteration: 15662 teacher: 0 stage: sketch lr: 0.000353
batch 746 loss: 1.50078 acc: 0.70117 | v_loss: 1.38589 v_acc: 0.72298 |  iteration: 15663 teacher: 1 stage: sketch lr: 0.000353
batch 747 loss: 1.56029 acc: 0.69596 | v_loss: 1.47963 v_acc: 0.69922 |  iteration: 15664 teacher: 0 stage: sketch lr: 0.000353
batch 748 loss: 1.50174 acc: 0.69434 | v_loss: 1.39836 v_acc: 0.71973 |  iteration: 15665 teacher: 1 sta

batch 806 loss: 1.45304 acc: 0.70117 | v_loss: 1.45141 v_acc: 0.69303 |  iteration: 15723 teacher: 1 stage: sketch lr: 0.000352
batch 807 loss: 1.39930 acc: 0.70215 | v_loss: 1.34500 v_acc: 0.71452 |  iteration: 15724 teacher: 1 stage: sketch lr: 0.000352
batch 808 loss: 1.36717 acc: 0.70931 | v_loss: 1.28088 v_acc: 0.71777 |  iteration: 15725 teacher: 0 stage: sketch lr: 0.000352
batch 809 loss: 1.50738 acc: 0.70280 | v_loss: 1.27617 v_acc: 0.71484 |  iteration: 15726 teacher: 1 stage: sketch lr: 0.000352
batch 810 loss: 1.42483 acc: 0.70768 | v_loss: 1.43581 v_acc: 0.70540 |  iteration: 15727 teacher: 1 stage: sketch lr: 0.000352
batch 811 loss: 1.33035 acc: 0.71452 | v_loss: 1.31866 v_acc: 0.73047 |  iteration: 15728 teacher: 1 stage: sketch lr: 0.000352
batch 812 loss: 1.34553 acc: 0.72201 | v_loss: 1.56293 v_acc: 0.71615 |  iteration: 15729 teacher: 1 stage: sketch lr: 0.000352
batch 813 loss: 1.43912 acc: 0.70475 | v_loss: 1.28256 v_acc: 0.69759 |  iteration: 15730 teacher: 1 sta

batch 871 loss: 1.44584 acc: 0.70508 | v_loss: 1.37183 v_acc: 0.71940 |  iteration: 15788 teacher: 1 stage: sketch lr: 0.000352
batch 872 loss: 1.35429 acc: 0.70247 | v_loss: 1.41530 v_acc: 0.70345 |  iteration: 15789 teacher: 1 stage: sketch lr: 0.000352
batch 873 loss: 1.47383 acc: 0.69466 | v_loss: 1.41840 v_acc: 0.70410 |  iteration: 15790 teacher: 0 stage: sketch lr: 0.000352
batch 874 loss: 1.41637 acc: 0.69596 | v_loss: 1.22409 v_acc: 0.71712 |  iteration: 15791 teacher: 0 stage: sketch lr: 0.000352
batch 875 loss: 1.40314 acc: 0.70117 | v_loss: 1.38346 v_acc: 0.72786 |  iteration: 15792 teacher: 1 stage: sketch lr: 0.000352
batch 876 loss: 1.47650 acc: 0.69824 | v_loss: 1.46640 v_acc: 0.69792 |  iteration: 15793 teacher: 0 stage: sketch lr: 0.000352
batch 877 loss: 1.46121 acc: 0.69954 | v_loss: 1.41337 v_acc: 0.72233 |  iteration: 15794 teacher: 0 stage: sketch lr: 0.000352
batch 878 loss: 1.44117 acc: 0.70280 | v_loss: 1.25683 v_acc: 0.71810 |  iteration: 15795 teacher: 0 sta

batch 936 loss: 1.43772 acc: 0.70280 | v_loss: 1.36091 v_acc: 0.69336 |  iteration: 15853 teacher: 1 stage: sketch lr: 0.000351
batch 937 loss: 1.58021 acc: 0.69043 | v_loss: 1.29200 v_acc: 0.71191 |  iteration: 15854 teacher: 0 stage: sketch lr: 0.000351
batch 938 loss: 1.50676 acc: 0.69141 | v_loss: 1.36294 v_acc: 0.69629 |  iteration: 15855 teacher: 1 stage: sketch lr: 0.000351
batch 939 loss: 1.43938 acc: 0.70182 | v_loss: 1.46848 v_acc: 0.71126 |  iteration: 15856 teacher: 0 stage: sketch lr: 0.000351
batch 940 loss: 1.33687 acc: 0.70736 | v_loss: 1.31902 v_acc: 0.72689 |  iteration: 15857 teacher: 0 stage: sketch lr: 0.000351
batch 941 loss: 1.38632 acc: 0.70768 | v_loss: 1.44644 v_acc: 0.70443 |  iteration: 15858 teacher: 1 stage: sketch lr: 0.000351
batch 942 loss: 1.46189 acc: 0.70020 | v_loss: 1.35962 v_acc: 0.69922 |  iteration: 15859 teacher: 1 stage: sketch lr: 0.000351
batch 943 loss: 1.51573 acc: 0.69954 | v_loss: 1.31883 v_acc: 0.70898 |  iteration: 15860 teacher: 0 sta

batch 1001 loss: 1.37564 acc: 0.70573 | v_loss: 1.41848 v_acc: 0.70345 |  iteration: 15918 teacher: 0 stage: sketch lr: 0.000350
batch 1002 loss: 1.50094 acc: 0.69629 | v_loss: 1.40428 v_acc: 0.70703 |  iteration: 15919 teacher: 1 stage: sketch lr: 0.000350
batch 1003 loss: 1.39847 acc: 0.70475 | v_loss: 1.39558 v_acc: 0.70443 |  iteration: 15920 teacher: 1 stage: sketch lr: 0.000350
batch 1004 loss: 1.35108 acc: 0.70703 | v_loss: 1.25755 v_acc: 0.71615 |  iteration: 15921 teacher: 0 stage: sketch lr: 0.000350
batch 1005 loss: 1.40428 acc: 0.70345 | v_loss: 1.32220 v_acc: 0.72461 |  iteration: 15922 teacher: 0 stage: sketch lr: 0.000350
batch 1006 loss: 1.35626 acc: 0.71061 | v_loss: 1.17853 v_acc: 0.71517 |  iteration: 15923 teacher: 1 stage: sketch lr: 0.000350
batch 1007 loss: 1.27344 acc: 0.71615 | v_loss: 1.34335 v_acc: 0.70703 |  iteration: 15924 teacher: 1 stage: sketch lr: 0.000350
batch 1008 loss: 1.48680 acc: 0.70052 | v_loss: 1.51496 v_acc: 0.69987 |  iteration: 15925 teache

batch 1065 loss: 1.43131 acc: 0.70247 | v_loss: 1.27507 v_acc: 0.71549 |  iteration: 15982 teacher: 0 stage: sketch lr: 0.000350
batch 1066 loss: 1.38938 acc: 0.70540 | v_loss: 1.48834 v_acc: 0.70215 |  iteration: 15983 teacher: 0 stage: sketch lr: 0.000350
batch 1067 loss: 1.30174 acc: 0.70475 | v_loss: 1.72078 v_acc: 0.68913 |  iteration: 15984 teacher: 0 stage: sketch lr: 0.000350
batch 1068 loss: 1.44041 acc: 0.70215 | v_loss: 1.55504 v_acc: 0.69531 |  iteration: 15985 teacher: 1 stage: sketch lr: 0.000350
batch 1069 loss: 1.46924 acc: 0.70443 | v_loss: 1.29496 v_acc: 0.72396 |  iteration: 15986 teacher: 1 stage: sketch lr: 0.000350
batch 1070 loss: 1.47325 acc: 0.69792 | v_loss: 1.36516 v_acc: 0.70866 |  iteration: 15987 teacher: 1 stage: sketch lr: 0.000350
batch 1071 loss: 1.50425 acc: 0.69564 | v_loss: 1.22181 v_acc: 0.71842 |  iteration: 15988 teacher: 1 stage: sketch lr: 0.000350
batch 1072 loss: 1.34256 acc: 0.71289 | v_loss: 1.40931 v_acc: 0.70247 |  iteration: 15989 teache

batch 1129 loss: 1.38790 acc: 0.71126 | v_loss: 1.46994 v_acc: 0.69629 |  iteration: 16046 teacher: 0 stage: sketch lr: 0.000349
batch 1130 loss: 1.45862 acc: 0.69857 | v_loss: 1.51689 v_acc: 0.69108 |  iteration: 16047 teacher: 0 stage: sketch lr: 0.000349
batch 1131 loss: 1.48110 acc: 0.70443 | v_loss: 1.38299 v_acc: 0.69629 |  iteration: 16048 teacher: 1 stage: sketch lr: 0.000349
batch 1132 loss: 1.39563 acc: 0.71126 | v_loss: 1.31834 v_acc: 0.70378 |  iteration: 16049 teacher: 1 stage: sketch lr: 0.000349
batch 1133 loss: 1.48996 acc: 0.69954 | v_loss: 1.33055 v_acc: 0.70215 |  iteration: 16050 teacher: 0 stage: sketch lr: 0.000349
batch 1134 loss: 1.43938 acc: 0.70182 | v_loss: 1.32677 v_acc: 0.71582 |  iteration: 16051 teacher: 1 stage: sketch lr: 0.000349
batch 1135 loss: 1.32378 acc: 0.71908 | v_loss: 1.54015 v_acc: 0.68978 |  iteration: 16052 teacher: 1 stage: sketch lr: 0.000349
batch 1136 loss: 1.47777 acc: 0.69889 | v_loss: 1.38205 v_acc: 0.70378 |  iteration: 16053 teache

batch 1193 loss: 1.45521 acc: 0.70312 | v_loss: 1.24026 v_acc: 0.71517 |  iteration: 16110 teacher: 0 stage: sketch lr: 0.000348
batch 1194 loss: 1.43266 acc: 0.69792 | v_loss: 1.22959 v_acc: 0.70638 |  iteration: 16111 teacher: 0 stage: sketch lr: 0.000348
batch 1195 loss: 1.59687 acc: 0.69303 | v_loss: 1.24429 v_acc: 0.73796 |  iteration: 16112 teacher: 1 stage: sketch lr: 0.000348
batch 1196 loss: 1.42992 acc: 0.71517 | v_loss: 1.25700 v_acc: 0.72266 |  iteration: 16113 teacher: 0 stage: sketch lr: 0.000348
batch 1197 loss: 1.44958 acc: 0.70540 | v_loss: 1.34307 v_acc: 0.73112 |  iteration: 16114 teacher: 1 stage: sketch lr: 0.000348
batch 1198 loss: 1.44108 acc: 0.70443 | v_loss: 1.25675 v_acc: 0.71940 |  iteration: 16115 teacher: 0 stage: sketch lr: 0.000348
batch 1199 loss: 1.46687 acc: 0.69629 | v_loss: 1.30450 v_acc: 0.72266 |  iteration: 16116 teacher: 0 stage: sketch lr: 0.000348
batch 1200 loss: 1.52555 acc: 0.68750 | v_loss: 1.41990 v_acc: 0.71289 |  iteration: 16117 teache

batch 13 loss: 1.37674 acc: 0.69954 | v_loss: 1.27366 v_acc: 0.72070 |  iteration: 16173 teacher: 1 stage: sketch lr: 0.000348
batch 14 loss: 1.38998 acc: 0.70020 | v_loss: 1.34293 v_acc: 0.73145 |  iteration: 16174 teacher: 1 stage: sketch lr: 0.000348
batch 15 loss: 1.50026 acc: 0.69076 | v_loss: 1.25958 v_acc: 0.72461 |  iteration: 16175 teacher: 0 stage: sketch lr: 0.000347
batch 16 loss: 1.49429 acc: 0.69727 | v_loss: 1.30569 v_acc: 0.72103 |  iteration: 16176 teacher: 0 stage: sketch lr: 0.000347
batch 17 loss: 1.33806 acc: 0.71289 | v_loss: 1.40843 v_acc: 0.71354 |  iteration: 16177 teacher: 1 stage: sketch lr: 0.000347
batch 18 loss: 1.54174 acc: 0.68978 | v_loss: 1.39376 v_acc: 0.72428 |  iteration: 16178 teacher: 0 stage: sketch lr: 0.000347
batch 19 loss: 1.36628 acc: 0.70378 | v_loss: 1.48823 v_acc: 0.70182 |  iteration: 16179 teacher: 1 stage: sketch lr: 0.000347
batch 20 loss: 1.38011 acc: 0.71224 | v_loss: 1.41687 v_acc: 0.71777 |  iteration: 16180 teacher: 0 stage: sket

batch 78 loss: 1.39469 acc: 0.69922 | v_loss: 1.51217 v_acc: 0.69303 |  iteration: 16238 teacher: 1 stage: sketch lr: 0.000347
batch 79 loss: 1.37668 acc: 0.70898 | v_loss: 1.33336 v_acc: 0.71159 |  iteration: 16239 teacher: 1 stage: sketch lr: 0.000347
batch 80 loss: 1.41594 acc: 0.71191 | v_loss: 1.28992 v_acc: 0.71549 |  iteration: 16240 teacher: 1 stage: sketch lr: 0.000347
batch 81 loss: 1.43696 acc: 0.70573 | v_loss: 1.28336 v_acc: 0.71973 |  iteration: 16241 teacher: 1 stage: sketch lr: 0.000347
batch 82 loss: 1.36343 acc: 0.70410 | v_loss: 1.42585 v_acc: 0.70573 |  iteration: 16242 teacher: 0 stage: sketch lr: 0.000347
batch 83 loss: 1.48533 acc: 0.70150 | v_loss: 1.30477 v_acc: 0.73079 |  iteration: 16243 teacher: 1 stage: sketch lr: 0.000347
batch 84 loss: 1.42579 acc: 0.70443 | v_loss: 1.54788 v_acc: 0.71582 |  iteration: 16244 teacher: 1 stage: sketch lr: 0.000347
batch 85 loss: 1.42085 acc: 0.70768 | v_loss: 1.27779 v_acc: 0.70150 |  iteration: 16245 teacher: 0 stage: sket

batch 143 loss: 1.44437 acc: 0.70052 | v_loss: 1.37063 v_acc: 0.71615 |  iteration: 16303 teacher: 1 stage: sketch lr: 0.000346
batch 144 loss: 1.39929 acc: 0.70410 | v_loss: 1.40916 v_acc: 0.70540 |  iteration: 16304 teacher: 0 stage: sketch lr: 0.000346
batch 145 loss: 1.45173 acc: 0.70085 | v_loss: 1.42253 v_acc: 0.70508 |  iteration: 16305 teacher: 1 stage: sketch lr: 0.000346
batch 146 loss: 1.45145 acc: 0.70671 | v_loss: 1.22837 v_acc: 0.71452 |  iteration: 16306 teacher: 1 stage: sketch lr: 0.000346
batch 147 loss: 1.44035 acc: 0.70280 | v_loss: 1.37655 v_acc: 0.72819 |  iteration: 16307 teacher: 1 stage: sketch lr: 0.000346
batch 148 loss: 1.41116 acc: 0.70508 | v_loss: 1.46845 v_acc: 0.69759 |  iteration: 16308 teacher: 1 stage: sketch lr: 0.000346
batch 149 loss: 1.38704 acc: 0.70801 | v_loss: 1.41608 v_acc: 0.72070 |  iteration: 16309 teacher: 0 stage: sketch lr: 0.000346
batch 150 loss: 1.36250 acc: 0.70898 | v_loss: 1.25516 v_acc: 0.72201 |  iteration: 16310 teacher: 1 sta

batch 208 loss: 1.42016 acc: 0.69954 | v_loss: 1.36469 v_acc: 0.69368 |  iteration: 16368 teacher: 0 stage: sketch lr: 0.000345
batch 209 loss: 1.37310 acc: 0.70703 | v_loss: 1.28598 v_acc: 0.71842 |  iteration: 16369 teacher: 1 stage: sketch lr: 0.000345
batch 210 loss: 1.54192 acc: 0.68945 | v_loss: 1.36056 v_acc: 0.70215 |  iteration: 16370 teacher: 1 stage: sketch lr: 0.000345
batch 211 loss: 1.30462 acc: 0.70898 | v_loss: 1.48789 v_acc: 0.72005 |  iteration: 16371 teacher: 0 stage: sketch lr: 0.000345
batch 212 loss: 1.44750 acc: 0.70540 | v_loss: 1.32717 v_acc: 0.72786 |  iteration: 16372 teacher: 0 stage: sketch lr: 0.000345
batch 213 loss: 1.43136 acc: 0.70833 | v_loss: 1.46048 v_acc: 0.70540 |  iteration: 16373 teacher: 1 stage: sketch lr: 0.000345
batch 214 loss: 1.38499 acc: 0.71257 | v_loss: 1.35942 v_acc: 0.69954 |  iteration: 16374 teacher: 1 stage: sketch lr: 0.000345
batch 215 loss: 1.42420 acc: 0.70410 | v_loss: 1.32306 v_acc: 0.70898 |  iteration: 16375 teacher: 1 sta

batch 273 loss: 1.42259 acc: 0.70931 | v_loss: 1.41540 v_acc: 0.70410 |  iteration: 16433 teacher: 1 stage: sketch lr: 0.000345
batch 274 loss: 1.42097 acc: 0.70215 | v_loss: 1.39976 v_acc: 0.70768 |  iteration: 16434 teacher: 1 stage: sketch lr: 0.000345
batch 275 loss: 1.46059 acc: 0.69564 | v_loss: 1.39467 v_acc: 0.70475 |  iteration: 16435 teacher: 0 stage: sketch lr: 0.000345
batch 276 loss: 1.37072 acc: 0.71322 | v_loss: 1.26460 v_acc: 0.71354 |  iteration: 16436 teacher: 1 stage: sketch lr: 0.000345
batch 277 loss: 1.45914 acc: 0.69336 | v_loss: 1.32012 v_acc: 0.72461 |  iteration: 16437 teacher: 0 stage: sketch lr: 0.000345
batch 278 loss: 1.38802 acc: 0.70898 | v_loss: 1.18051 v_acc: 0.71517 |  iteration: 16438 teacher: 1 stage: sketch lr: 0.000345
batch 279 loss: 1.40129 acc: 0.70247 | v_loss: 1.34653 v_acc: 0.70703 |  iteration: 16439 teacher: 0 stage: sketch lr: 0.000345
batch 280 loss: 1.44255 acc: 0.69727 | v_loss: 1.50587 v_acc: 0.69987 |  iteration: 16440 teacher: 1 sta

batch 338 loss: 1.46522 acc: 0.69531 | v_loss: 1.43803 v_acc: 0.71680 |  iteration: 16498 teacher: 1 stage: sketch lr: 0.000344
batch 339 loss: 1.46225 acc: 0.70247 | v_loss: 1.67164 v_acc: 0.69336 |  iteration: 16499 teacher: 1 stage: sketch lr: 0.000344
batch 340 loss: 1.40984 acc: 0.70931 | v_loss: 1.52477 v_acc: 0.69759 |  iteration: 16500 teacher: 0 stage: sketch lr: 0.000344
batch 341 loss: 1.43630 acc: 0.71191 | v_loss: 1.29006 v_acc: 0.72396 |  iteration: 16501 teacher: 0 stage: sketch lr: 0.000344
batch 342 loss: 1.43671 acc: 0.70605 | v_loss: 1.36690 v_acc: 0.70996 |  iteration: 16502 teacher: 0 stage: sketch lr: 0.000344
batch 343 loss: 1.43216 acc: 0.70410 | v_loss: 1.21779 v_acc: 0.72168 |  iteration: 16503 teacher: 0 stage: sketch lr: 0.000344
batch 344 loss: 1.38674 acc: 0.70638 | v_loss: 1.41885 v_acc: 0.70378 |  iteration: 16504 teacher: 1 stage: sketch lr: 0.000344
batch 345 loss: 1.32886 acc: 0.71224 | v_loss: 1.35364 v_acc: 0.71810 |  iteration: 16505 teacher: 0 sta

batch 403 loss: 1.35692 acc: 0.70540 | v_loss: 1.37464 v_acc: 0.70150 |  iteration: 16563 teacher: 1 stage: sketch lr: 0.000343
batch 404 loss: 1.51424 acc: 0.69694 | v_loss: 1.32668 v_acc: 0.70312 |  iteration: 16564 teacher: 1 stage: sketch lr: 0.000343
batch 405 loss: 1.37245 acc: 0.70573 | v_loss: 1.33028 v_acc: 0.70020 |  iteration: 16565 teacher: 0 stage: sketch lr: 0.000343
batch 406 loss: 1.37685 acc: 0.70964 | v_loss: 1.33028 v_acc: 0.71484 |  iteration: 16566 teacher: 1 stage: sketch lr: 0.000343
batch 407 loss: 1.40982 acc: 0.70768 | v_loss: 1.54220 v_acc: 0.68978 |  iteration: 16567 teacher: 0 stage: sketch lr: 0.000343
batch 408 loss: 1.48481 acc: 0.70312 | v_loss: 1.38083 v_acc: 0.70443 |  iteration: 16568 teacher: 0 stage: sketch lr: 0.000343
batch 409 loss: 1.55807 acc: 0.68978 | v_loss: 1.33855 v_acc: 0.71029 |  iteration: 16569 teacher: 0 stage: sketch lr: 0.000343
batch 410 loss: 1.55105 acc: 0.69759 | v_loss: 1.38793 v_acc: 0.71680 |  iteration: 16570 teacher: 1 sta

batch 468 loss: 1.41649 acc: 0.69792 | v_loss: 1.26995 v_acc: 0.71908 |  iteration: 16628 teacher: 0 stage: sketch lr: 0.000343
batch 469 loss: 1.36906 acc: 0.72038 | v_loss: 1.34790 v_acc: 0.73145 |  iteration: 16629 teacher: 0 stage: sketch lr: 0.000343
batch 470 loss: 1.49270 acc: 0.69987 | v_loss: 1.26052 v_acc: 0.72461 |  iteration: 16630 teacher: 1 stage: sketch lr: 0.000343
batch 471 loss: 1.42632 acc: 0.70443 | v_loss: 1.30982 v_acc: 0.72005 |  iteration: 16631 teacher: 1 stage: sketch lr: 0.000343
batch 472 loss: 1.33880 acc: 0.70345 | v_loss: 1.42313 v_acc: 0.71224 |  iteration: 16632 teacher: 0 stage: sketch lr: 0.000343
batch 473 loss: 1.43266 acc: 0.70280 | v_loss: 1.38977 v_acc: 0.72070 |  iteration: 16633 teacher: 1 stage: sketch lr: 0.000343
batch 474 loss: 1.58570 acc: 0.69238 | v_loss: 1.48644 v_acc: 0.69954 |  iteration: 16634 teacher: 1 stage: sketch lr: 0.000343
batch 475 loss: 1.39315 acc: 0.70280 | v_loss: 1.40147 v_acc: 0.71615 |  iteration: 16635 teacher: 0 sta

batch 533 loss: 1.38018 acc: 0.71094 | v_loss: 1.48555 v_acc: 0.69206 |  iteration: 16693 teacher: 0 stage: sketch lr: 0.000342
batch 534 loss: 1.37447 acc: 0.71647 | v_loss: 1.33333 v_acc: 0.70931 |  iteration: 16694 teacher: 1 stage: sketch lr: 0.000342
batch 535 loss: 1.35706 acc: 0.71322 | v_loss: 1.29067 v_acc: 0.71549 |  iteration: 16695 teacher: 1 stage: sketch lr: 0.000342
batch 536 loss: 1.38313 acc: 0.70964 | v_loss: 1.27962 v_acc: 0.71973 |  iteration: 16696 teacher: 0 stage: sketch lr: 0.000342
batch 537 loss: 1.44994 acc: 0.70736 | v_loss: 1.42579 v_acc: 0.70573 |  iteration: 16697 teacher: 0 stage: sketch lr: 0.000342
batch 538 loss: 1.47745 acc: 0.69531 | v_loss: 1.30744 v_acc: 0.73112 |  iteration: 16698 teacher: 1 stage: sketch lr: 0.000342
batch 539 loss: 1.38338 acc: 0.70736 | v_loss: 1.54515 v_acc: 0.71452 |  iteration: 16699 teacher: 0 stage: sketch lr: 0.000342
batch 540 loss: 1.56855 acc: 0.69954 | v_loss: 1.28414 v_acc: 0.69759 |  iteration: 16700 teacher: 0 sta

batch 598 loss: 1.37554 acc: 0.71354 | v_loss: 1.37505 v_acc: 0.71940 |  iteration: 16758 teacher: 0 stage: sketch lr: 0.000341
batch 599 loss: 1.42123 acc: 0.69727 | v_loss: 1.42228 v_acc: 0.70345 |  iteration: 16759 teacher: 0 stage: sketch lr: 0.000341
batch 600 loss: 1.40284 acc: 0.70443 | v_loss: 1.42411 v_acc: 0.70312 |  iteration: 16760 teacher: 0 stage: sketch lr: 0.000341
batch 601 loss: 1.40501 acc: 0.70117 | v_loss: 1.23690 v_acc: 0.71289 |  iteration: 16761 teacher: 0 stage: sketch lr: 0.000341
batch 602 loss: 1.48379 acc: 0.69824 | v_loss: 1.40108 v_acc: 0.72656 |  iteration: 16762 teacher: 0 stage: sketch lr: 0.000341
batch 603 loss: 1.45046 acc: 0.70150 | v_loss: 1.47776 v_acc: 0.69889 |  iteration: 16763 teacher: 1 stage: sketch lr: 0.000341
batch 604 loss: 1.31171 acc: 0.70996 | v_loss: 1.40488 v_acc: 0.72103 |  iteration: 16764 teacher: 1 stage: sketch lr: 0.000341
batch 605 loss: 1.46815 acc: 0.69792 | v_loss: 1.24356 v_acc: 0.71908 |  iteration: 16765 teacher: 1 sta

batch 663 loss: 1.40945 acc: 0.70996 | v_loss: 1.36225 v_acc: 0.69368 |  iteration: 16823 teacher: 0 stage: sketch lr: 0.000341
batch 664 loss: 1.37315 acc: 0.71712 | v_loss: 1.28829 v_acc: 0.71810 |  iteration: 16824 teacher: 0 stage: sketch lr: 0.000341
batch 665 loss: 1.42153 acc: 0.71061 | v_loss: 1.35929 v_acc: 0.69629 |  iteration: 16825 teacher: 0 stage: sketch lr: 0.000341
batch 666 loss: 1.37674 acc: 0.71289 | v_loss: 1.47371 v_acc: 0.71517 |  iteration: 16826 teacher: 0 stage: sketch lr: 0.000341
batch 667 loss: 1.41685 acc: 0.70736 | v_loss: 1.31842 v_acc: 0.72689 |  iteration: 16827 teacher: 0 stage: sketch lr: 0.000341
batch 668 loss: 1.48844 acc: 0.69727 | v_loss: 1.44805 v_acc: 0.70443 |  iteration: 16828 teacher: 0 stage: sketch lr: 0.000341
batch 669 loss: 1.43661 acc: 0.69889 | v_loss: 1.36000 v_acc: 0.69889 |  iteration: 16829 teacher: 1 stage: sketch lr: 0.000341
batch 670 loss: 1.40206 acc: 0.70638 | v_loss: 1.32948 v_acc: 0.70866 |  iteration: 16830 teacher: 1 sta

batch 728 loss: 1.42841 acc: 0.70150 | v_loss: 1.42727 v_acc: 0.70215 |  iteration: 16888 teacher: 0 stage: sketch lr: 0.000340
batch 729 loss: 1.41277 acc: 0.70215 | v_loss: 1.41111 v_acc: 0.70931 |  iteration: 16889 teacher: 0 stage: sketch lr: 0.000340
batch 730 loss: 1.35026 acc: 0.71387 | v_loss: 1.40497 v_acc: 0.70703 |  iteration: 16890 teacher: 0 stage: sketch lr: 0.000340
batch 731 loss: 1.40257 acc: 0.70638 | v_loss: 1.25985 v_acc: 0.71875 |  iteration: 16891 teacher: 1 stage: sketch lr: 0.000340
batch 732 loss: 1.39862 acc: 0.70247 | v_loss: 1.31699 v_acc: 0.72331 |  iteration: 16892 teacher: 0 stage: sketch lr: 0.000340
batch 733 loss: 1.42591 acc: 0.70182 | v_loss: 1.17255 v_acc: 0.71615 |  iteration: 16893 teacher: 1 stage: sketch lr: 0.000340
batch 734 loss: 1.31422 acc: 0.70768 | v_loss: 1.34308 v_acc: 0.71126 |  iteration: 16894 teacher: 0 stage: sketch lr: 0.000340
batch 735 loss: 1.45084 acc: 0.69759 | v_loss: 1.50995 v_acc: 0.69954 |  iteration: 16895 teacher: 0 sta

batch 793 loss: 1.51494 acc: 0.69076 | v_loss: 1.44308 v_acc: 0.71582 |  iteration: 16953 teacher: 0 stage: sketch lr: 0.000339
batch 794 loss: 1.52803 acc: 0.68978 | v_loss: 1.64866 v_acc: 0.69401 |  iteration: 16954 teacher: 0 stage: sketch lr: 0.000339
batch 795 loss: 1.44886 acc: 0.70508 | v_loss: 1.51249 v_acc: 0.69922 |  iteration: 16955 teacher: 1 stage: sketch lr: 0.000339
batch 796 loss: 1.51456 acc: 0.69661 | v_loss: 1.29002 v_acc: 0.72135 |  iteration: 16956 teacher: 0 stage: sketch lr: 0.000339
batch 797 loss: 1.40122 acc: 0.69889 | v_loss: 1.36555 v_acc: 0.70410 |  iteration: 16957 teacher: 0 stage: sketch lr: 0.000339
batch 798 loss: 1.48673 acc: 0.69206 | v_loss: 1.22135 v_acc: 0.71973 |  iteration: 16958 teacher: 0 stage: sketch lr: 0.000339
batch 799 loss: 1.39296 acc: 0.70345 | v_loss: 1.41262 v_acc: 0.70150 |  iteration: 16959 teacher: 0 stage: sketch lr: 0.000339
batch 800 loss: 1.50081 acc: 0.69922 | v_loss: 1.36037 v_acc: 0.71159 |  iteration: 16960 teacher: 0 sta

batch 858 loss: 1.35712 acc: 0.70671 | v_loss: 1.37010 v_acc: 0.70182 |  iteration: 17018 teacher: 1 stage: sketch lr: 0.000339
batch 859 loss: 1.42861 acc: 0.70540 | v_loss: 1.31483 v_acc: 0.70638 |  iteration: 17019 teacher: 1 stage: sketch lr: 0.000339
batch 860 loss: 1.39608 acc: 0.70833 | v_loss: 1.32462 v_acc: 0.70410 |  iteration: 17020 teacher: 0 stage: sketch lr: 0.000339
batch 861 loss: 1.49961 acc: 0.69564 | v_loss: 1.33105 v_acc: 0.71842 |  iteration: 17021 teacher: 1 stage: sketch lr: 0.000339
batch 862 loss: 1.43319 acc: 0.69759 | v_loss: 1.57610 v_acc: 0.68978 |  iteration: 17022 teacher: 1 stage: sketch lr: 0.000339
batch 863 loss: 1.46692 acc: 0.69889 | v_loss: 1.39219 v_acc: 0.71257 |  iteration: 17023 teacher: 1 stage: sketch lr: 0.000339
batch 864 loss: 1.38050 acc: 0.70443 | v_loss: 1.33274 v_acc: 0.70866 |  iteration: 17024 teacher: 1 stage: sketch lr: 0.000339
batch 865 loss: 1.50261 acc: 0.68848 | v_loss: 1.38988 v_acc: 0.71452 |  iteration: 17025 teacher: 1 sta

batch 923 loss: 1.46953 acc: 0.69694 | v_loss: 1.26109 v_acc: 0.72168 |  iteration: 17083 teacher: 0 stage: sketch lr: 0.000338
batch 924 loss: 1.41654 acc: 0.69922 | v_loss: 1.34655 v_acc: 0.73145 |  iteration: 17084 teacher: 0 stage: sketch lr: 0.000338
batch 925 loss: 1.37933 acc: 0.71842 | v_loss: 1.26434 v_acc: 0.72070 |  iteration: 17085 teacher: 0 stage: sketch lr: 0.000338
batch 926 loss: 1.42029 acc: 0.70410 | v_loss: 1.31526 v_acc: 0.71419 |  iteration: 17086 teacher: 0 stage: sketch lr: 0.000338
batch 927 loss: 1.46136 acc: 0.70996 | v_loss: 1.42621 v_acc: 0.71289 |  iteration: 17087 teacher: 1 stage: sketch lr: 0.000338
batch 928 loss: 1.42117 acc: 0.69954 | v_loss: 1.40620 v_acc: 0.71680 |  iteration: 17088 teacher: 1 stage: sketch lr: 0.000338
batch 929 loss: 1.46964 acc: 0.70150 | v_loss: 1.49038 v_acc: 0.69792 |  iteration: 17089 teacher: 0 stage: sketch lr: 0.000338
batch 930 loss: 1.37038 acc: 0.71289 | v_loss: 1.42578 v_acc: 0.71973 |  iteration: 17090 teacher: 0 sta

batch 988 loss: 1.39594 acc: 0.70638 | v_loss: 1.49231 v_acc: 0.69271 |  iteration: 17148 teacher: 0 stage: sketch lr: 0.000337
batch 989 loss: 1.41317 acc: 0.71452 | v_loss: 1.32563 v_acc: 0.71419 |  iteration: 17149 teacher: 1 stage: sketch lr: 0.000337
batch 990 loss: 1.42464 acc: 0.69434 | v_loss: 1.29708 v_acc: 0.71777 |  iteration: 17150 teacher: 1 stage: sketch lr: 0.000337
batch 991 loss: 1.49602 acc: 0.69076 | v_loss: 1.29234 v_acc: 0.71484 |  iteration: 17151 teacher: 1 stage: sketch lr: 0.000337
batch 992 loss: 1.43513 acc: 0.69661 | v_loss: 1.43508 v_acc: 0.70508 |  iteration: 17152 teacher: 1 stage: sketch lr: 0.000337
batch 993 loss: 1.28773 acc: 0.70443 | v_loss: 1.31383 v_acc: 0.73047 |  iteration: 17153 teacher: 0 stage: sketch lr: 0.000337
batch 994 loss: 1.54049 acc: 0.69694 | v_loss: 1.55671 v_acc: 0.71582 |  iteration: 17154 teacher: 0 stage: sketch lr: 0.000337
batch 995 loss: 1.40387 acc: 0.69661 | v_loss: 1.27781 v_acc: 0.69922 |  iteration: 17155 teacher: 1 sta

batch 1052 loss: 1.48730 acc: 0.69531 | v_loss: 1.25004 v_acc: 0.72591 |  iteration: 17212 teacher: 1 stage: sketch lr: 0.000337
batch 1053 loss: 1.40388 acc: 0.71615 | v_loss: 1.37131 v_acc: 0.71940 |  iteration: 17213 teacher: 0 stage: sketch lr: 0.000337
batch 1054 loss: 1.46892 acc: 0.70215 | v_loss: 1.41960 v_acc: 0.70345 |  iteration: 17214 teacher: 1 stage: sketch lr: 0.000337
batch 1055 loss: 1.42657 acc: 0.71029 | v_loss: 1.42614 v_acc: 0.70410 |  iteration: 17215 teacher: 1 stage: sketch lr: 0.000337
batch 1056 loss: 1.43287 acc: 0.70117 | v_loss: 1.22637 v_acc: 0.71712 |  iteration: 17216 teacher: 1 stage: sketch lr: 0.000337
batch 1057 loss: 1.33673 acc: 0.70475 | v_loss: 1.38732 v_acc: 0.72786 |  iteration: 17217 teacher: 0 stage: sketch lr: 0.000337
batch 1058 loss: 1.36883 acc: 0.71517 | v_loss: 1.47479 v_acc: 0.69792 |  iteration: 17218 teacher: 1 stage: sketch lr: 0.000337
batch 1059 loss: 1.43155 acc: 0.69727 | v_loss: 1.41854 v_acc: 0.72070 |  iteration: 17219 teache

batch 1116 loss: 1.49764 acc: 0.70052 | v_loss: 1.21345 v_acc: 0.70833 |  iteration: 17276 teacher: 0 stage: sketch lr: 0.000336
batch 1117 loss: 1.48333 acc: 0.69922 | v_loss: 1.32902 v_acc: 0.71159 |  iteration: 17277 teacher: 0 stage: sketch lr: 0.000336
batch 1118 loss: 1.47581 acc: 0.70052 | v_loss: 1.36704 v_acc: 0.69336 |  iteration: 17278 teacher: 0 stage: sketch lr: 0.000336
batch 1119 loss: 1.48412 acc: 0.70736 | v_loss: 1.29528 v_acc: 0.71191 |  iteration: 17279 teacher: 0 stage: sketch lr: 0.000336
batch 1120 loss: 1.50037 acc: 0.70703 | v_loss: 1.37145 v_acc: 0.69629 |  iteration: 17280 teacher: 1 stage: sketch lr: 0.000336
batch 1121 loss: 1.35795 acc: 0.70605 | v_loss: 1.47088 v_acc: 0.71517 |  iteration: 17281 teacher: 1 stage: sketch lr: 0.000336
batch 1122 loss: 1.47919 acc: 0.69629 | v_loss: 1.31713 v_acc: 0.72689 |  iteration: 17282 teacher: 1 stage: sketch lr: 0.000336
batch 1123 loss: 1.39617 acc: 0.71680 | v_loss: 1.43862 v_acc: 0.70508 |  iteration: 17283 teache

batch 1180 loss: 1.42673 acc: 0.71257 | v_loss: 1.47142 v_acc: 0.70768 |  iteration: 17340 teacher: 0 stage: sketch lr: 0.000336
batch 1181 loss: 1.45869 acc: 0.69922 | v_loss: 1.51209 v_acc: 0.68815 |  iteration: 17341 teacher: 1 stage: sketch lr: 0.000336
batch 1182 loss: 1.44035 acc: 0.71061 | v_loss: 1.45843 v_acc: 0.70378 |  iteration: 17342 teacher: 0 stage: sketch lr: 0.000336
batch 1183 loss: 1.45612 acc: 0.69857 | v_loss: 1.42638 v_acc: 0.71029 |  iteration: 17343 teacher: 1 stage: sketch lr: 0.000336
batch 1184 loss: 1.41794 acc: 0.70703 | v_loss: 1.41232 v_acc: 0.70605 |  iteration: 17344 teacher: 0 stage: sketch lr: 0.000336
batch 1185 loss: 1.41774 acc: 0.70247 | v_loss: 1.40681 v_acc: 0.70703 |  iteration: 17345 teacher: 1 stage: sketch lr: 0.000336
batch 1186 loss: 1.50040 acc: 0.70052 | v_loss: 1.26596 v_acc: 0.71159 |  iteration: 17346 teacher: 1 stage: sketch lr: 0.000336
batch 1187 loss: 1.40372 acc: 0.70475 | v_loss: 1.31660 v_acc: 0.72331 |  iteration: 17347 teache

epoch: 14
__________________________________________
batch 0 loss: 1.35002 acc: 0.70703 | v_loss: 1.41850 v_acc: 0.69759 |  iteration: 17403 teacher: 1 stage: sketch lr: 0.000335
batch 1 loss: 1.48638 acc: 0.69824 | v_loss: 1.40772 v_acc: 0.70996 |  iteration: 17404 teacher: 1 stage: sketch lr: 0.000335
batch 2 loss: 1.39398 acc: 0.69499 | v_loss: 1.39771 v_acc: 0.70703 |  iteration: 17405 teacher: 0 stage: sketch lr: 0.000335
batch 3 loss: 1.52716 acc: 0.69531 | v_loss: 1.25114 v_acc: 0.71875 |  iteration: 17406 teacher: 0 stage: sketch lr: 0.000335
batch 4 loss: 1.42035 acc: 0.70215 | v_loss: 1.33256 v_acc: 0.72331 |  iteration: 17407 teacher: 1 stage: sketch lr: 0.000335
batch 5 loss: 1.52269 acc: 0.70573 | v_loss: 1.16731 v_acc: 0.71615 |  iteration: 17408 teacher: 1 stage: sketch lr: 0.000335
batch 6 loss: 1.47122 acc: 0.69987 | v_loss: 1.33904 v_acc: 0.71061 |  iteration: 17409 teacher: 1 stage: sketch lr: 0.000335
batch 7 loss: 1.45861 acc: 0.69759 | v_loss: 1.50977 v_acc: 0.698

batch 65 loss: 1.38098 acc: 0.70280 | v_loss: 1.41596 v_acc: 0.71777 |  iteration: 17468 teacher: 1 stage: sketch lr: 0.000334
batch 66 loss: 1.49071 acc: 0.70866 | v_loss: 1.67760 v_acc: 0.69303 |  iteration: 17469 teacher: 1 stage: sketch lr: 0.000334
batch 67 loss: 1.37946 acc: 0.72168 | v_loss: 1.52687 v_acc: 0.70117 |  iteration: 17470 teacher: 0 stage: sketch lr: 0.000334
batch 68 loss: 1.39160 acc: 0.70475 | v_loss: 1.29562 v_acc: 0.72363 |  iteration: 17471 teacher: 0 stage: sketch lr: 0.000334
batch 69 loss: 1.52849 acc: 0.69173 | v_loss: 1.37967 v_acc: 0.70410 |  iteration: 17472 teacher: 1 stage: sketch lr: 0.000334
batch 70 loss: 1.28244 acc: 0.71973 | v_loss: 1.21407 v_acc: 0.71973 |  iteration: 17473 teacher: 0 stage: sketch lr: 0.000334
batch 71 loss: 1.50521 acc: 0.69857 | v_loss: 1.43271 v_acc: 0.70150 |  iteration: 17474 teacher: 0 stage: sketch lr: 0.000334
batch 72 loss: 1.39317 acc: 0.71191 | v_loss: 1.35607 v_acc: 0.71842 |  iteration: 17475 teacher: 0 stage: sket

batch 130 loss: 1.50348 acc: 0.70150 | v_loss: 1.37129 v_acc: 0.70020 |  iteration: 17533 teacher: 1 stage: sketch lr: 0.000334
batch 131 loss: 1.52718 acc: 0.68620 | v_loss: 1.32684 v_acc: 0.70312 |  iteration: 17534 teacher: 1 stage: sketch lr: 0.000334
batch 132 loss: 1.45216 acc: 0.70312 | v_loss: 1.33159 v_acc: 0.70085 |  iteration: 17535 teacher: 1 stage: sketch lr: 0.000334
batch 133 loss: 1.44525 acc: 0.69922 | v_loss: 1.32421 v_acc: 0.71745 |  iteration: 17536 teacher: 1 stage: sketch lr: 0.000334
batch 134 loss: 1.37070 acc: 0.71647 | v_loss: 1.52096 v_acc: 0.69173 |  iteration: 17537 teacher: 1 stage: sketch lr: 0.000334
batch 135 loss: 1.37041 acc: 0.70443 | v_loss: 1.37413 v_acc: 0.70378 |  iteration: 17538 teacher: 0 stage: sketch lr: 0.000334
batch 136 loss: 1.43129 acc: 0.69954 | v_loss: 1.34550 v_acc: 0.70833 |  iteration: 17539 teacher: 0 stage: sketch lr: 0.000334
batch 137 loss: 1.41922 acc: 0.70638 | v_loss: 1.38447 v_acc: 0.71680 |  iteration: 17540 teacher: 0 sta

batch 195 loss: 1.43555 acc: 0.69434 | v_loss: 1.26728 v_acc: 0.71842 |  iteration: 17598 teacher: 0 stage: sketch lr: 0.000333
batch 196 loss: 1.43374 acc: 0.70280 | v_loss: 1.32136 v_acc: 0.73145 |  iteration: 17599 teacher: 0 stage: sketch lr: 0.000333
batch 197 loss: 1.33111 acc: 0.71094 | v_loss: 1.26193 v_acc: 0.72461 |  iteration: 17600 teacher: 0 stage: sketch lr: 0.000333
batch 198 loss: 1.47579 acc: 0.69629 | v_loss: 1.29928 v_acc: 0.72005 |  iteration: 17601 teacher: 1 stage: sketch lr: 0.000333
batch 199 loss: 1.39401 acc: 0.70345 | v_loss: 1.40947 v_acc: 0.71224 |  iteration: 17602 teacher: 0 stage: sketch lr: 0.000333
batch 200 loss: 1.35270 acc: 0.71126 | v_loss: 1.39410 v_acc: 0.72070 |  iteration: 17603 teacher: 1 stage: sketch lr: 0.000333
batch 201 loss: 1.46409 acc: 0.70703 | v_loss: 1.49008 v_acc: 0.69954 |  iteration: 17604 teacher: 1 stage: sketch lr: 0.000333
batch 202 loss: 1.45308 acc: 0.69596 | v_loss: 1.43093 v_acc: 0.71615 |  iteration: 17605 teacher: 1 sta

batch 260 loss: 1.38843 acc: 0.71029 | v_loss: 1.48592 v_acc: 0.69043 |  iteration: 17663 teacher: 0 stage: sketch lr: 0.000333
batch 261 loss: 1.37085 acc: 0.69824 | v_loss: 1.33389 v_acc: 0.70996 |  iteration: 17664 teacher: 0 stage: sketch lr: 0.000333
batch 262 loss: 1.47146 acc: 0.69987 | v_loss: 1.29494 v_acc: 0.71549 |  iteration: 17665 teacher: 1 stage: sketch lr: 0.000333
batch 263 loss: 1.34447 acc: 0.70475 | v_loss: 1.29300 v_acc: 0.71973 |  iteration: 17666 teacher: 1 stage: sketch lr: 0.000333
batch 264 loss: 1.31034 acc: 0.72005 | v_loss: 1.42310 v_acc: 0.70573 |  iteration: 17667 teacher: 1 stage: sketch lr: 0.000332
batch 265 loss: 1.38776 acc: 0.70443 | v_loss: 1.30534 v_acc: 0.73112 |  iteration: 17668 teacher: 1 stage: sketch lr: 0.000332
batch 266 loss: 1.43019 acc: 0.69922 | v_loss: 1.53930 v_acc: 0.71452 |  iteration: 17669 teacher: 1 stage: sketch lr: 0.000332
batch 267 loss: 1.45432 acc: 0.70247 | v_loss: 1.27957 v_acc: 0.69759 |  iteration: 17670 teacher: 0 sta

batch 325 loss: 1.53365 acc: 0.69727 | v_loss: 1.38237 v_acc: 0.72233 |  iteration: 17728 teacher: 0 stage: sketch lr: 0.000332
batch 326 loss: 1.32714 acc: 0.71940 | v_loss: 1.41861 v_acc: 0.70410 |  iteration: 17729 teacher: 0 stage: sketch lr: 0.000332
batch 327 loss: 1.44082 acc: 0.70312 | v_loss: 1.42177 v_acc: 0.70833 |  iteration: 17730 teacher: 1 stage: sketch lr: 0.000332
batch 328 loss: 1.50702 acc: 0.69531 | v_loss: 1.22016 v_acc: 0.72591 |  iteration: 17731 teacher: 0 stage: sketch lr: 0.000332
batch 329 loss: 1.44529 acc: 0.70378 | v_loss: 1.39722 v_acc: 0.73112 |  iteration: 17732 teacher: 0 stage: sketch lr: 0.000332
batch 330 loss: 1.44549 acc: 0.69596 | v_loss: 1.48068 v_acc: 0.69857 |  iteration: 17733 teacher: 1 stage: sketch lr: 0.000332
batch 331 loss: 1.40913 acc: 0.69954 | v_loss: 1.42377 v_acc: 0.72070 |  iteration: 17734 teacher: 1 stage: sketch lr: 0.000332
batch 332 loss: 1.40453 acc: 0.70345 | v_loss: 1.24629 v_acc: 0.72201 |  iteration: 17735 teacher: 1 sta

batch 390 loss: 1.38449 acc: 0.69824 | v_loss: 1.35926 v_acc: 0.69499 |  iteration: 17793 teacher: 1 stage: sketch lr: 0.000331
batch 391 loss: 1.46927 acc: 0.70052 | v_loss: 1.30245 v_acc: 0.70996 |  iteration: 17794 teacher: 1 stage: sketch lr: 0.000331
batch 392 loss: 1.45579 acc: 0.69271 | v_loss: 1.36044 v_acc: 0.69434 |  iteration: 17795 teacher: 1 stage: sketch lr: 0.000331
batch 393 loss: 1.37438 acc: 0.70247 | v_loss: 1.47153 v_acc: 0.71224 |  iteration: 17796 teacher: 0 stage: sketch lr: 0.000331
batch 394 loss: 1.41402 acc: 0.69727 | v_loss: 1.31729 v_acc: 0.72786 |  iteration: 17797 teacher: 0 stage: sketch lr: 0.000331
batch 395 loss: 1.35082 acc: 0.70898 | v_loss: 1.44426 v_acc: 0.70540 |  iteration: 17798 teacher: 0 stage: sketch lr: 0.000331
batch 396 loss: 1.50709 acc: 0.69173 | v_loss: 1.37696 v_acc: 0.69857 |  iteration: 17799 teacher: 0 stage: sketch lr: 0.000331
batch 397 loss: 1.39113 acc: 0.71549 | v_loss: 1.31283 v_acc: 0.70833 |  iteration: 17800 teacher: 0 sta

batch 455 loss: 1.48511 acc: 0.69401 | v_loss: 1.41123 v_acc: 0.70736 |  iteration: 17858 teacher: 1 stage: sketch lr: 0.000331
batch 456 loss: 1.36650 acc: 0.70443 | v_loss: 1.39311 v_acc: 0.70703 |  iteration: 17859 teacher: 0 stage: sketch lr: 0.000331
batch 457 loss: 1.39233 acc: 0.71061 | v_loss: 1.42169 v_acc: 0.70443 |  iteration: 17860 teacher: 0 stage: sketch lr: 0.000331
batch 458 loss: 1.33968 acc: 0.70215 | v_loss: 1.27635 v_acc: 0.71159 |  iteration: 17861 teacher: 0 stage: sketch lr: 0.000331
batch 459 loss: 1.46998 acc: 0.69824 | v_loss: 1.31030 v_acc: 0.72233 |  iteration: 17862 teacher: 0 stage: sketch lr: 0.000331
batch 460 loss: 1.43316 acc: 0.70085 | v_loss: 1.18360 v_acc: 0.71615 |  iteration: 17863 teacher: 0 stage: sketch lr: 0.000331
batch 461 loss: 1.42769 acc: 0.70443 | v_loss: 1.34448 v_acc: 0.71061 |  iteration: 17864 teacher: 0 stage: sketch lr: 0.000331
batch 462 loss: 1.51017 acc: 0.69531 | v_loss: 1.52466 v_acc: 0.69466 |  iteration: 17865 teacher: 0 sta

batch 520 loss: 1.44988 acc: 0.69661 | v_loss: 1.49370 v_acc: 0.70475 |  iteration: 17923 teacher: 0 stage: sketch lr: 0.000330
batch 521 loss: 1.44159 acc: 0.69466 | v_loss: 1.70035 v_acc: 0.68913 |  iteration: 17924 teacher: 1 stage: sketch lr: 0.000330
batch 522 loss: 1.42884 acc: 0.69987 | v_loss: 1.54565 v_acc: 0.69531 |  iteration: 17925 teacher: 1 stage: sketch lr: 0.000330
batch 523 loss: 1.39297 acc: 0.69694 | v_loss: 1.29607 v_acc: 0.72526 |  iteration: 17926 teacher: 0 stage: sketch lr: 0.000330
batch 524 loss: 1.40889 acc: 0.70508 | v_loss: 1.36288 v_acc: 0.70964 |  iteration: 17927 teacher: 0 stage: sketch lr: 0.000330
batch 525 loss: 1.45292 acc: 0.69987 | v_loss: 1.21242 v_acc: 0.72103 |  iteration: 17928 teacher: 0 stage: sketch lr: 0.000330
batch 526 loss: 1.44678 acc: 0.69466 | v_loss: 1.40440 v_acc: 0.70410 |  iteration: 17929 teacher: 0 stage: sketch lr: 0.000330
batch 527 loss: 1.36703 acc: 0.70410 | v_loss: 1.35379 v_acc: 0.71810 |  iteration: 17930 teacher: 1 sta

batch 585 loss: 1.53276 acc: 0.68490 | v_loss: 1.37935 v_acc: 0.70182 |  iteration: 17988 teacher: 1 stage: sketch lr: 0.000330
batch 586 loss: 1.45138 acc: 0.70215 | v_loss: 1.31964 v_acc: 0.70638 |  iteration: 17989 teacher: 1 stage: sketch lr: 0.000330
batch 587 loss: 1.34055 acc: 0.71647 | v_loss: 1.33214 v_acc: 0.70410 |  iteration: 17990 teacher: 1 stage: sketch lr: 0.000329
batch 588 loss: 1.42844 acc: 0.70638 | v_loss: 1.33209 v_acc: 0.71842 |  iteration: 17991 teacher: 0 stage: sketch lr: 0.000329
batch 589 loss: 1.39708 acc: 0.70573 | v_loss: 1.53031 v_acc: 0.69271 |  iteration: 17992 teacher: 0 stage: sketch lr: 0.000329
batch 590 loss: 1.49820 acc: 0.70280 | v_loss: 1.37327 v_acc: 0.70443 |  iteration: 17993 teacher: 0 stage: sketch lr: 0.000329
batch 591 loss: 1.43359 acc: 0.69596 | v_loss: 1.35081 v_acc: 0.71029 |  iteration: 17994 teacher: 1 stage: sketch lr: 0.000329
batch 592 loss: 1.43159 acc: 0.70833 | v_loss: 1.39325 v_acc: 0.71680 |  iteration: 17995 teacher: 0 sta

batch 650 loss: 1.37147 acc: 0.70605 | v_loss: 1.26189 v_acc: 0.72461 |  iteration: 18053 teacher: 1 stage: sketch lr: 0.000329
batch 651 loss: 1.35195 acc: 0.70833 | v_loss: 1.33996 v_acc: 0.72689 |  iteration: 18054 teacher: 1 stage: sketch lr: 0.000329
batch 652 loss: 1.31819 acc: 0.70931 | v_loss: 1.26102 v_acc: 0.72852 |  iteration: 18055 teacher: 0 stage: sketch lr: 0.000329
batch 653 loss: 1.45538 acc: 0.70410 | v_loss: 1.30578 v_acc: 0.72038 |  iteration: 18056 teacher: 0 stage: sketch lr: 0.000329
batch 654 loss: 1.36040 acc: 0.71159 | v_loss: 1.42423 v_acc: 0.71224 |  iteration: 18057 teacher: 1 stage: sketch lr: 0.000329
batch 655 loss: 1.44289 acc: 0.70573 | v_loss: 1.39643 v_acc: 0.72070 |  iteration: 18058 teacher: 0 stage: sketch lr: 0.000329
batch 656 loss: 1.39006 acc: 0.70182 | v_loss: 1.48414 v_acc: 0.69954 |  iteration: 18059 teacher: 0 stage: sketch lr: 0.000329
batch 657 loss: 1.28597 acc: 0.71029 | v_loss: 1.41889 v_acc: 0.71615 |  iteration: 18060 teacher: 0 sta

batch 715 loss: 1.43579 acc: 0.70215 | v_loss: 1.46841 v_acc: 0.69303 |  iteration: 18118 teacher: 0 stage: sketch lr: 0.000328
batch 716 loss: 1.41508 acc: 0.69531 | v_loss: 1.33645 v_acc: 0.71159 |  iteration: 18119 teacher: 1 stage: sketch lr: 0.000328
batch 717 loss: 1.49571 acc: 0.70247 | v_loss: 1.28396 v_acc: 0.71549 |  iteration: 18120 teacher: 0 stage: sketch lr: 0.000328
batch 718 loss: 1.39015 acc: 0.70475 | v_loss: 1.28042 v_acc: 0.71973 |  iteration: 18121 teacher: 0 stage: sketch lr: 0.000328
batch 719 loss: 1.38614 acc: 0.70378 | v_loss: 1.42498 v_acc: 0.70573 |  iteration: 18122 teacher: 0 stage: sketch lr: 0.000328
batch 720 loss: 1.48878 acc: 0.69499 | v_loss: 1.31257 v_acc: 0.73112 |  iteration: 18123 teacher: 1 stage: sketch lr: 0.000328
batch 721 loss: 1.36278 acc: 0.70508 | v_loss: 1.54662 v_acc: 0.71452 |  iteration: 18124 teacher: 0 stage: sketch lr: 0.000328
batch 722 loss: 1.44996 acc: 0.70801 | v_loss: 1.28579 v_acc: 0.69759 |  iteration: 18125 teacher: 0 sta

batch 780 loss: 1.44287 acc: 0.70573 | v_loss: 1.36611 v_acc: 0.71615 |  iteration: 18183 teacher: 1 stage: sketch lr: 0.000328
batch 781 loss: 1.35471 acc: 0.70475 | v_loss: 1.41567 v_acc: 0.70475 |  iteration: 18184 teacher: 1 stage: sketch lr: 0.000328
batch 782 loss: 1.45528 acc: 0.69661 | v_loss: 1.41747 v_acc: 0.70540 |  iteration: 18185 teacher: 0 stage: sketch lr: 0.000328
batch 783 loss: 1.45273 acc: 0.70508 | v_loss: 1.22701 v_acc: 0.71582 |  iteration: 18186 teacher: 0 stage: sketch lr: 0.000328
batch 784 loss: 1.52521 acc: 0.69043 | v_loss: 1.38294 v_acc: 0.72754 |  iteration: 18187 teacher: 0 stage: sketch lr: 0.000328
batch 785 loss: 1.44389 acc: 0.70247 | v_loss: 1.46472 v_acc: 0.69792 |  iteration: 18188 teacher: 0 stage: sketch lr: 0.000328
batch 786 loss: 1.37340 acc: 0.71647 | v_loss: 1.40846 v_acc: 0.72070 |  iteration: 18189 teacher: 0 stage: sketch lr: 0.000328
batch 787 loss: 1.54065 acc: 0.69629 | v_loss: 1.24510 v_acc: 0.72201 |  iteration: 18190 teacher: 0 sta

batch 845 loss: 1.39698 acc: 0.70931 | v_loss: 1.37169 v_acc: 0.69368 |  iteration: 18248 teacher: 1 stage: sketch lr: 0.000327
batch 846 loss: 1.45562 acc: 0.70215 | v_loss: 1.29399 v_acc: 0.71777 |  iteration: 18249 teacher: 1 stage: sketch lr: 0.000327
batch 847 loss: 1.58284 acc: 0.69889 | v_loss: 1.35840 v_acc: 0.69629 |  iteration: 18250 teacher: 1 stage: sketch lr: 0.000327
batch 848 loss: 1.40923 acc: 0.72103 | v_loss: 1.46487 v_acc: 0.71517 |  iteration: 18251 teacher: 1 stage: sketch lr: 0.000327
batch 849 loss: 1.35584 acc: 0.71126 | v_loss: 1.31584 v_acc: 0.72689 |  iteration: 18252 teacher: 0 stage: sketch lr: 0.000327
batch 850 loss: 1.34547 acc: 0.71582 | v_loss: 1.44444 v_acc: 0.70443 |  iteration: 18253 teacher: 0 stage: sketch lr: 0.000327
batch 851 loss: 1.53504 acc: 0.68815 | v_loss: 1.36922 v_acc: 0.70150 |  iteration: 18254 teacher: 0 stage: sketch lr: 0.000327
batch 852 loss: 1.43043 acc: 0.70736 | v_loss: 1.32779 v_acc: 0.70866 |  iteration: 18255 teacher: 0 sta

batch 910 loss: 1.41786 acc: 0.70671 | v_loss: 1.42235 v_acc: 0.70410 |  iteration: 18313 teacher: 1 stage: sketch lr: 0.000327
batch 911 loss: 1.47770 acc: 0.70020 | v_loss: 1.41302 v_acc: 0.70768 |  iteration: 18314 teacher: 1 stage: sketch lr: 0.000327
batch 912 loss: 1.40593 acc: 0.70866 | v_loss: 1.39051 v_acc: 0.70475 |  iteration: 18315 teacher: 1 stage: sketch lr: 0.000327
batch 913 loss: 1.45690 acc: 0.69499 | v_loss: 1.25681 v_acc: 0.71354 |  iteration: 18316 teacher: 0 stage: sketch lr: 0.000327
batch 914 loss: 1.41853 acc: 0.70280 | v_loss: 1.32677 v_acc: 0.72461 |  iteration: 18317 teacher: 0 stage: sketch lr: 0.000327
batch 915 loss: 1.44460 acc: 0.70768 | v_loss: 1.18536 v_acc: 0.71517 |  iteration: 18318 teacher: 1 stage: sketch lr: 0.000327
batch 916 loss: 1.44863 acc: 0.70182 | v_loss: 1.34057 v_acc: 0.70703 |  iteration: 18319 teacher: 1 stage: sketch lr: 0.000327
batch 917 loss: 1.58992 acc: 0.69271 | v_loss: 1.50344 v_acc: 0.69987 |  iteration: 18320 teacher: 1 sta

batch 975 loss: 1.45940 acc: 0.70247 | v_loss: 1.43634 v_acc: 0.71647 |  iteration: 18378 teacher: 0 stage: sketch lr: 0.000326
batch 976 loss: 1.50544 acc: 0.69434 | v_loss: 1.67467 v_acc: 0.69303 |  iteration: 18379 teacher: 0 stage: sketch lr: 0.000326
batch 977 loss: 1.44399 acc: 0.69401 | v_loss: 1.52391 v_acc: 0.70117 |  iteration: 18380 teacher: 0 stage: sketch lr: 0.000326
batch 978 loss: 1.45621 acc: 0.70378 | v_loss: 1.28938 v_acc: 0.72363 |  iteration: 18381 teacher: 0 stage: sketch lr: 0.000326
batch 979 loss: 1.45092 acc: 0.69954 | v_loss: 1.37186 v_acc: 0.70410 |  iteration: 18382 teacher: 0 stage: sketch lr: 0.000326
batch 980 loss: 1.41453 acc: 0.70052 | v_loss: 1.22542 v_acc: 0.71973 |  iteration: 18383 teacher: 0 stage: sketch lr: 0.000326
batch 981 loss: 1.37454 acc: 0.71224 | v_loss: 1.41471 v_acc: 0.70150 |  iteration: 18384 teacher: 1 stage: sketch lr: 0.000326
batch 982 loss: 1.45962 acc: 0.69238 | v_loss: 1.35523 v_acc: 0.71029 |  iteration: 18385 teacher: 0 sta

batch 1039 loss: 1.52128 acc: 0.69954 | v_loss: 1.51986 v_acc: 0.69173 |  iteration: 18442 teacher: 1 stage: sketch lr: 0.000325
batch 1040 loss: 1.43314 acc: 0.70801 | v_loss: 1.37817 v_acc: 0.70020 |  iteration: 18443 teacher: 1 stage: sketch lr: 0.000325
batch 1041 loss: 1.43771 acc: 0.70182 | v_loss: 1.31820 v_acc: 0.70345 |  iteration: 18444 teacher: 1 stage: sketch lr: 0.000325
batch 1042 loss: 1.42746 acc: 0.70475 | v_loss: 1.33998 v_acc: 0.69824 |  iteration: 18445 teacher: 1 stage: sketch lr: 0.000325
batch 1043 loss: 1.34464 acc: 0.70085 | v_loss: 1.33092 v_acc: 0.71549 |  iteration: 18446 teacher: 0 stage: sketch lr: 0.000325
batch 1044 loss: 1.41163 acc: 0.70117 | v_loss: 1.51485 v_acc: 0.69401 |  iteration: 18447 teacher: 1 stage: sketch lr: 0.000325
batch 1045 loss: 1.54400 acc: 0.69694 | v_loss: 1.37520 v_acc: 0.69922 |  iteration: 18448 teacher: 1 stage: sketch lr: 0.000325
batch 1046 loss: 1.42580 acc: 0.71387 | v_loss: 1.35206 v_acc: 0.71322 |  iteration: 18449 teache

batch 1103 loss: 1.33506 acc: 0.70833 | v_loss: 1.25740 v_acc: 0.70410 |  iteration: 18506 teacher: 0 stage: sketch lr: 0.000325
batch 1104 loss: 1.41249 acc: 0.70410 | v_loss: 1.23892 v_acc: 0.73958 |  iteration: 18507 teacher: 1 stage: sketch lr: 0.000325
batch 1105 loss: 1.43363 acc: 0.70638 | v_loss: 1.27244 v_acc: 0.71875 |  iteration: 18508 teacher: 1 stage: sketch lr: 0.000325
batch 1106 loss: 1.42641 acc: 0.70605 | v_loss: 1.34399 v_acc: 0.73242 |  iteration: 18509 teacher: 1 stage: sketch lr: 0.000325
batch 1107 loss: 1.41310 acc: 0.70247 | v_loss: 1.26358 v_acc: 0.71940 |  iteration: 18510 teacher: 0 stage: sketch lr: 0.000325
batch 1108 loss: 1.48960 acc: 0.70182 | v_loss: 1.30722 v_acc: 0.72168 |  iteration: 18511 teacher: 1 stage: sketch lr: 0.000325
batch 1109 loss: 1.43868 acc: 0.69857 | v_loss: 1.42153 v_acc: 0.71289 |  iteration: 18512 teacher: 0 stage: sketch lr: 0.000325
batch 1110 loss: 1.42967 acc: 0.70964 | v_loss: 1.39597 v_acc: 0.72070 |  iteration: 18513 teache

batch 1167 loss: 1.57963 acc: 0.68522 | v_loss: 1.36975 v_acc: 0.70410 |  iteration: 18570 teacher: 0 stage: sketch lr: 0.000324
batch 1168 loss: 1.32566 acc: 0.70573 | v_loss: 1.27890 v_acc: 0.72233 |  iteration: 18571 teacher: 0 stage: sketch lr: 0.000324
batch 1169 loss: 1.49738 acc: 0.68066 | v_loss: 1.30066 v_acc: 0.72135 |  iteration: 18572 teacher: 0 stage: sketch lr: 0.000324
batch 1170 loss: 1.48222 acc: 0.70768 | v_loss: 1.46326 v_acc: 0.69336 |  iteration: 18573 teacher: 1 stage: sketch lr: 0.000324
batch 1171 loss: 1.48032 acc: 0.69303 | v_loss: 1.34573 v_acc: 0.71387 |  iteration: 18574 teacher: 1 stage: sketch lr: 0.000324
batch 1172 loss: 1.38524 acc: 0.70345 | v_loss: 1.28330 v_acc: 0.71615 |  iteration: 18575 teacher: 0 stage: sketch lr: 0.000324
batch 1173 loss: 1.35661 acc: 0.70866 | v_loss: 1.27653 v_acc: 0.71615 |  iteration: 18576 teacher: 1 stage: sketch lr: 0.000324
batch 1174 loss: 1.41596 acc: 0.70280 | v_loss: 1.41029 v_acc: 0.70605 |  iteration: 18577 teache

batch 1231 loss: 1.42844 acc: 0.69922 | v_loss: 1.43689 v_acc: 0.69954 |  iteration: 18634 teacher: 0 stage: sketch lr: 0.000324
batch 1232 loss: 1.33062 acc: 0.71680 | v_loss: 1.42906 v_acc: 0.71289 |  iteration: 18635 teacher: 1 stage: sketch lr: 0.000324
batch 1233 loss: 1.40239 acc: 0.70768 | v_loss: 1.28215 v_acc: 0.71875 |  iteration: 18636 teacher: 0 stage: sketch lr: 0.000324
batch 1234 loss: 1.42153 acc: 0.70052 | v_loss: 1.25345 v_acc: 0.72754 |  iteration: 18637 teacher: 0 stage: sketch lr: 0.000324
batch 1235 loss: 1.38054 acc: 0.70182 | v_loss: 1.37843 v_acc: 0.71940 |  iteration: 18638 teacher: 1 stage: sketch lr: 0.000324
batch 1236 loss: 1.42447 acc: 0.69922 | v_loss: 1.42493 v_acc: 0.70345 |  iteration: 18639 teacher: 0 stage: sketch lr: 0.000324
batch 1237 loss: 1.48460 acc: 0.69401 | v_loss: 1.43036 v_acc: 0.70410 |  iteration: 18640 teacher: 1 stage: sketch lr: 0.000324
batch 1238 loss: 1.43413 acc: 0.69727 | v_loss: 1.21858 v_acc: 0.71712 |  iteration: 18641 teache