In [1]:

import torch
import torch.optim as optim
import torch.nn as nn
from parse_data import get_data, get_modified_values, get_binary_values, make_data_scalar
import numpy as np
import random
from data_gen import Datagen
from recognition import Recognition
from generator import Generator
from evaluation import evaluate_model, bin_plot
from time_recognition import TimeRecognition

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device=None
print("Using device: ", device)

import torch
print(torch.__version__)
 

Using device:  cuda
1.12.0+cu116


In [2]:
gen = Datagen(device)

x, y, x_1 = gen.get_generated_data(seq_len=2)

print("x", x[0])
print("y", y[0])
print("x_1", x_1[0])

x tensor([[0.],
        [0.]], device='cuda:0')
y tensor([1.], device='cuda:0')
x_1 tensor([[0.],
        [1.]], device='cuda:0')


In [3]:
import random

# Hyperparameters
sequence_length = [2*i for i in range(4,16)] # 2-20 increments of two
hidden_layers = [1,2] # 1 and 2
hidden_1 = [2**i for i in range(2,7)] # 2^4 to 2^9
hidden_2 =[2**i for i in range(5,10)] # 2^2 to 2^5
variance = [0.001, 0.01, 0.005, 0.05]
lr = [0.001, 0.01, 0.1, 0.005] # stop at 0.005
data_probability = [i/5 for i in range(1,6)]
regularization = [1/i for i in range(1,10)]
for i in range(3):
    regularization.append(0)

epochs = 500
optimizer = [optim.Adam, optim.SGD]

options = []

for seq_len in sequence_length:
    for layers in hidden_layers:
        for h1 in hidden_1:
            for h2 in hidden_2:
                for l in lr:
                    for v in variance:
                        for p in data_probability:
                            for r in regularization:
                                entry = {}
                                entry["seq_len"] = seq_len
                                entry["layers"] = layers
                                entry["latent"] = h1
                                entry["hidden"] = h2
                                entry["l"] = l
                                entry["variance"] = v
                                entry["data_prob"] = p
                                entry["regularization"] = r
                                options.append(entry)
                
                                         
random.shuffle(options)    


In [None]:


import torch.utils.data as data
from itertools import chain
import torch.nn.functional as F

def loss(x, x_hat, mean, R, s, x_1,reg,  device=None, seq_len=1):
    
    mse = nn.MSELoss().to(device)
    l = F.binary_cross_entropy(x_hat, x, reduction='sum')
    amount = mean[0].size()[0]*mean[0].size()[1]
    for m, r in zip(mean, R):
        
        C = r @ r.transpose(-2,-1) + 1e-6
        det = C.det() + 1e-6 
        l += 0.5 * torch.sum(m.pow(2).sum(-1) 
                             + C.diagonal(dim1=-2,dim2=-1).sum(-1)
                            -det.log()  -1)/amount

    count = len(s)*2
    for a, b in zip(s, x_1):
        l += reg*mse(a[0], b[0])/count
        l += reg*mse(a[1], b[1])/count
    
    #print(l, F.binary_cross_entropy(x_hat, x, reduction='sum'))
    return l 

best_model = None
best_score = 10000000000000000
batch_size = 10
best_history= [0,0,0,0,0,0]
for entry in options:

    x_d, y_d, x_1_d = gen.get_generated_data(entry["seq_len"], entry["variance"], entry["data_prob"])
    x_t, y_t, x_t_1 = gen.get_true_data(entry["seq_len"])
    x_val, y_val, x_val_1 = gen.get_test_data(entry["seq_len"])


    model_t = TimeRecognition(input_dim=x_d[0].size()[1],
                              hidden_size=entry["hidden"],
                              seq_len=entry["seq_len"],
                              layers=entry["layers"],
                             device=device)

    model_g = Generator(hidden_size=entry["hidden"],
                        latent_dim=entry["latent"],
                        output_dim=y_d[0].size()[0],
                        layers=entry["layers"],
                        seq_len=entry["seq_len"],
                        device=device)
    model_r = Recognition(input_dim=x_d[0].size()[1],
                          latent_dim=entry["latent"],
                          layers=entry["layers"],
                          device=device)

    loader = data.DataLoader(data.TensorDataset(x_d, y_d, x_1_d), batch_size=batch_size, shuffle=True)
    optimizer = optim.Adam(chain(model_r.parameters(), model_g.parameters(), model_t.parameters()), lr=0.01)
    #optimizer = optim.Adam(model_r.parameters())
    history = []
    bce = nn.BCELoss().to(device)
    for e in range(epochs):
        model_g.train()
        model_r.train()
        model_t.train()


        for x, y, x_1 in loader:

            x.to(device)
            y.to(device)
            if x.size()[0] < batch_size:
                continue
            if random.random() < 0.5:
                continue

            t = model_t(x)
            t_1 = model_t(x_1)
            model_g.make_internal_state()
            rec = model_r(x_1)
            model_g.set_xi(rec[-1])
            model_g.set_internal_state(t)
            b, s = model_g()

            l = loss(y, b, rec[0], rec[1], s, t_1, entry["regularization"], device, entry["seq_len"])
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
                        

        
        if e % 10 != 0:
            continue
        
        count = 0
        sum_loss = [0, 0]
        for j in range(2):
            for x, y, x_1 in loader:
                model_g.eval()
                model_t.eval()
                model_r.eval()
                model_g.make_internal_state()
                model_g.make_xi()
                with torch.no_grad():
                    model_g.make_internal_state()
                    rec = model_r(x_1)
                    t = model_t(x)
                    t_1 = model_t(x_1)
                    model_g.set_internal_state(t)
                    model_g.set_xi(rec[-1])
                    b,s = model_g()
                    l = loss(y, b, rec[0], rec[1],s,t_1,entry["regularization"], device, entry["seq_len"])
                    res = []
                    
                    sum_loss[j] += l.item()
                    count += 1
                    
        
        
        sum_loss[0] /= count
     
        
        history.append([e, sum_loss[0], sum_loss[1]])
        print(history[-1])

        if len(history) > 15:
            #if no real improvements are being done stop the training. 
            # but keep doing the training if the results without correctly feeding values get better
            if abs(history[-15][1] - history[-1][1]) < 0.0001:
                break
        
    
    if history[-1][1] < best_score:
        print("New best model:\nNew loss: ", history[-1], "\nOld loss:", best_history[-1], "\nHistory:" , history[-10:])
        best_model = model_g
        best_history = history
        best_score = history[-1][1]
        best_config = entry
        with torch.no_grad():
            evaluate_model(best_model,model_r, model_t, x_t, y_t, x_t_1,x_val,y_val, x_val_1, entry)
    else:
        with torch.no_grad():
            evaluate_model(model_g,model_r, model_t, x_t, y_t, x_t_1,x_val,y_val, x_val_1, entry)
        print("Old model still stands:\nCurrent loss: ", history[-1], "\nBest loss:", best_history[-1])
    

[10, 7.183002580239318, 11020.6673746109]
[20, 7.238397690397019, 11085.505493164062]
[30, 7.219948619215047, 11017.67986869812]
[40, 7.221480355250306, 11082.172514915466]
[50, 7.3234477703004845, 11196.203893661499]
[60, 7.240130959229432, 11097.616790771484]
[70, 7.309230597772424, 11183.62596321106]
[80, 7.218655926751717, 11083.661992073059]
[90, 7.210942463214964, 11062.433305740356]
[100, 7.194042856948494, 11048.292337417603]
[110, 7.255224343690798, 11142.379161834717]
[120, 7.205919951122673, 11033.069692611694]
[130, 7.210334184586846, 11050.64178943634]
[140, 7.190562615506643, 11030.767473220825]
[150, 7.201941186391965, 11058.502060890198]
[160, 7.200841938546681, 11040.048259735107]
[170, 7.187624763882192, 11044.901119232178]
[180, 7.220659988667261, 11075.382726669312]
[190, 7.174888125287646, 11023.61337852478]
[200, 7.204359663995688, 11037.897086143494]
[210, 7.23074166395023, 11054.731225013733]
[220, 7.224494919141963, 11070.69748878479]
[230, 7.197062110776378, 1

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

[0, 17.604279864239007, 26960.11173248291]
[10, 17.522728656041714, 26844.471237182617]
[20, 17.614191469884727, 26953.56702041626]
[30, 17.685351209914405, 27104.060871124268]
[40, 17.51427579237338, 26814.0352973938]
[50, 17.512642895272755, 26846.00830078125]
[60, 17.505205278919508, 26793.768913269043]
[70, 17.63816269757542, 27071.89623260498]
[80, 17.558727451155143, 26881.279567718506]
[90, 17.481687677746965, 26785.9001121521]
[100, 17.534793681948987, 26887.38395690918]
[110, 17.481349740887435, 26796.466876983643]
[120, 17.51519195467, 26821.180751800537]
[130, 17.48670266751521, 26782.236572265625]
[140, 17.490703551638532, 26770.06491470337]
[150, 17.634344701044864, 27018.653163909912]
[160, 17.50828939193845, 26815.9847946167]
[170, 17.563072772312413, 26889.40966796875]
[180, 17.564934596069175, 26908.305179595947]
[190, 17.54746494094012, 26902.497554779053]
[200, 17.720177371570397, 27148.79664993286]
[210, 17.52619188572657, 26808.16402053833]
[220, 17.549962237983085

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

[0, 33.67238836064351, 51574.07107925415]
[10, 33.66680433606977, 51532.17106628418]
[20, 33.494712535768514, 51309.99166870117]
[30, 33.53329390583088, 51383.38747406006]
[40, 33.53811829270644, 51364.0046081543]
[50, 33.47492829564346, 51275.213539123535]
[60, 33.478242819365256, 51283.14453125]
[70, 33.56747055551713, 51423.914222717285]
[80, 33.65122753272791, 51539.416107177734]
[90, 33.524434878994214, 51351.66544342041]
[100, 33.579280599290335, 51459.76747131348]
[110, 33.78813801857572, 51733.52870178223]
[120, 33.90160846959206, 51946.684478759766]
[130, 33.502385067255005, 51283.49284362793]
[140, 33.46805218928165, 51340.555252075195]
[150, 33.47966686179059, 51282.535652160645]
[160, 33.482580020595776, 51293.164558410645]
[170, 36.90700383597192, 56491.416107177734]
[180, 33.58926480743966, 51469.38752746582]
[190, 33.43674926658212, 51267.19896697998]
[200, 33.53762100259567, 51390.90029144287]
[210, 33.61037301145708, 51490.11267089844]
[220, 33.597045559795966, 51475.1

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Old model still stands:
Current loss:  [490, 49.256456673923424, 75455.51912689209] 
Best loss: [490, 4.226098413878259, 6472.057052612305]
[0, 7.275757426070171, 11130.67308807373]
[10, 7.1993520427932935, 11043.44072151184]
[20, 7.207502884902158, 11051.234474182129]
[30, 7.2387761310248715, 11088.611001968384]
[40, 7.271040075441564, 11118.368326187134]
[50, 7.2190113441128645, 11066.31300163269]
[60, 7.30971299014577, 11196.362617492676]
[70, 7.263813844237564, 11115.396579742432]
[80, 7.27490187313787, 11136.065011024475]
[90, 7.241750641840245, 11077.95290184021]
[100, 7.214110799620108, 11036.191459655762]
[110, 7.232624271831064, 11092.92087650299]
[120, 7.21378690393413, 11051.90025806427]
[130, 7.22302996396396, 11087.994876861572]
[140, 7.249613509788214, 11117.266543388367]
[150, 7.328800113020615, 11207.074842453003]
[160, 7.226442631479966, 11083.958872795105]
[170, 7.221445549250271, 11051.175695419312]
[180, 7.190019505453483, 11010.09848022461]
[190, 7.183610332230052,

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[200, 3.401805601761173, 5203.341804981232]
[420, 3.407222319520796, 5216.738505363464]
[430, 3.4106439103034396, 5247.999827384949]
[440, 3.4291389309083824, 5255.641390800476]
[450, 3.417591197683668, 5239.246494054794]
[460, 3.4149247491951087, 5229.360130786896]
[470, 3.481042284878365, 5310.966951847076]
[480, 3.4544589650226323, 5271.054623126984]
[490, 3.4389652035566285, 5229.957242012024]
Initial
Initial_done
HERE
[398.09356689453125, 397.89422607421875, 401.2530822753906, 392.42742919921875, 396.7616271972656, 395.8659362792969, 393.96734619140625, 400.3830261230469, 401.7217102050781, 397.0516052246094, 397.4393005371094]


Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 3.4389652035566285, 5229.957242012024] 
Best loss: [490, 3.4346131901828176, 5270.837095975876]
[0, 33.661930592190814, 51610.8994140625]
[10, 33.4567862478311, 51243.66011047363]
[20, 33.49014973951693, 51303.557750701904]
[30, 33.449456712907036, 51256.024379730225]
[40, 33.474907772970575, 51276.394134521484]
[50, 33.4621554192924, 51274.09085083008]
[60, 33.58265103372519, 51363.44013977051]
[70, 33.434463747487676, 51262.02149963379]
[80, 33.453144031156135, 51274.48654174805]
[90, 33.42276988515032, 51206.26738739014]
[100, 33.425900974721884, 51206.347595214844]
[110, 33.559545277926695, 51437.58192443848]
[120, 33.448381653031234, 51242.37387084961]
[130, 33.53874873865989, 51409.44534301758]
[140, 33.41771915625032, 51195.96642303467]
[150, 33.45544030211924, 51242.881896972656]
[160, 33.430387153326684, 51223.31839752197]
[170, 33.472403135374385, 51269.764656066895]
[180, 33.44655181844925, 51222.17981338501]
[190, 33.456754828868

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 33.56762925638542, 51461.14395904541] 
Best loss: [490, 3.4346131901828176, 5270.837095975876]
[0, 7.278001597282469, 11120.783648490906]
[10, 7.199839443201807, 11018.757566452026]
[20, 7.2288317711484025, 11078.588416099548]
[30, 7.182316522996668, 11001.166117668152]
[40, 7.1886080313600385, 11014.507711410522]
[50, 7.204594963835674, 11031.110382080078]
[60, 7.191102992772434, 11030.982034683228]
[70, 7.186236298115384, 10992.453724861145]
[80, 7.184765374380365, 11000.323481559753]
[90, 7.177994423061999, 10996.352637290955]
[100, 7.262116251664124, 11153.938194274902]
[110, 7.19538555008313, 11028.715054512024]
[120, 7.264204625361271, 11139.378890037537]
[130, 7.176598348443253, 10993.307138442993]
[140, 7.169260656864774, 10995.870774269104]
[150, 7.193048993227687, 11015.029750823975]
[160, 7.276606009460927, 11143.693224906921]
[170, 7.183141343587373, 11011.334886550903]
[180, 7.183279353707017, 11010.280786514282]
[190, 7.2015741

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 17.384920697921874, 26605.63046646118] 
Best loss: [490, 3.4346131901828176, 5270.837095975876]
[0, 3.508738078585493, 5375.021528482437]
[10, 3.4916983609411174, 5358.049523830414]
[20, 3.4734353437124903, 5329.360245704651]
[30, 3.5025895335966863, 5343.096829414368]
[40, 3.4543858665399076, 5281.680019378662]
[50, 3.4615740623548823, 5301.815705299377]
[60, 3.4578010702880184, 5278.266487598419]
[70, 3.4570920926472537, 5272.334177017212]
[80, 3.450124640066381, 5250.033550739288]
[90, 3.439220229577147, 5255.643657684326]
[100, 3.478085707124157, 5296.821902275085]
[110, 3.431201526919482, 5282.814143657684]
[120, 3.436574636800482, 5274.31067943573]
[130, 3.4331472586091443, 5264.977505683899]
[140, 3.44150670327966, 5279.468070030212]
[150, 3.4462496064039185, 5248.151501655579]
[160, 3.4235903553800857, 5258.249281406403]
[170, 3.449013471292142, 5284.039057254791]
[180, 3.425545647001142, 5268.213783025742]
[190, 3.448009135050811, 5

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Old model still stands:
Current loss:  [490, 25.107360635662825, 38404.915031433105] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 13.286426088517391, 20345.34938812256]
[10, 13.21482236702822, 20253.366863250732]
[20, 13.216676024793022, 20237.63084793091]
[30, 13.211206936649491, 20251.79935646057]
[40, 13.187468887309182, 20173.46528816223]
[50, 13.21136819973938, 20249.22348022461]
[60, 13.17281151689375, 20195.491891860962]
[70, 13.240736611518162, 20296.68364715576]
[80, 13.217643400396442, 20242.558502197266]
[90, 13.181030287132563, 20173.16170501709]
[100, 13.169064513074511, 20172.34268951416]
[110, 13.170649323077487, 20156.20443534851]
[120, 13.19862385767247, 20213.815160751343]
[130, 13.17467941129799, 20170.144119262695]
[140, 13.129238134887137, 20138.42460823059]
[150, 13.126227920422666, 20113.594032287598]
[160, 13.114618051145468, 20093.722158432007]
[170, 13.115860104872104, 20100.09754562378]
[180, 13.100475083443266, 20085.236307144165]
[190, 13.110

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [400, 13.11384631074751, 20102.749324798584] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 49.4522913193267, 75742.25942230225]
[10, 49.226641328776786, 75376.24632263184]
[20, 49.25679509135513, 75459.54356384277]
[30, 49.282392297650134, 75551.38948822021]
[40, 49.234565052600196, 75445.56902313232]
[50, 49.17374124352677, 75343.2914352417]
[60, 49.25689101654929, 75447.03386688232]
[70, 49.185260399203386, 75336.24826812744]
[80, 49.38122486881426, 75615.69680786133]
[90, 49.195958533424, 75393.87897491455]
[100, 49.18916280462599, 75374.52254486084]
[110, 49.18637291133871, 75371.36252593994]
[120, 49.32095310519943, 75566.1005783081]
[130, 49.18128058431043, 75346.10108947754]
[140, 49.23355044770801, 75426.9789428711]
[150, 49.212015669899884, 75392.07109832764]
[160, 49.18532623012134, 75371.3278503418]
[170, 49.194700763991236, 75365.39905548096]
[180, 49.19483488595828, 75341.85571289062]
[190, 49.292031599398385, 75595.6858

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 49.17916729680552, 75363.21667480469] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 5.576853890331856, 8533.376480102539]
[10, 5.462396944160561, 8376.8644323349]
[20, 5.430963973463671, 8331.07702255249]
[30, 5.438232436192565, 8334.017078399658]
[40, 5.416779310834003, 8313.919909477234]
[50, 5.448658402221321, 8325.747069358826]
[60, 5.420496682896626, 8313.59488105774]
[70, 5.420703713016161, 8314.893261909485]
[80, 5.421014637610931, 8300.970283985138]
[90, 5.418846015830575, 8296.66567993164]
[100, 5.421999919507896, 8293.605573654175]
[110, 5.467008001188074, 8380.472021102905]
[120, 5.442111152892947, 8339.669898033142]
[130, 5.4448833963578425, 8322.839463233948]
[140, 5.412052989317294, 8287.084658622742]
[150, 5.406489521654094, 8292.019771575928]
[160, 5.444685073807719, 8357.70651626587]
[170, 5.423611096240211, 8289.963980674744]
[180, 5.387039922236151, 8263.234637260437]
[190, 5.346713138311401, 8209.10849761962

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 5.357209406073348, 8209.026301383972] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 13.254106107019568, 20316.56840133667]
[10, 13.175143526988615, 20176.847911834717]
[20, 13.174040027449088, 20182.465867996216]
[30, 13.175105799582857, 20202.51490020752]
[40, 13.188728953777343, 20230.370050430298]
[50, 13.172937678294767, 20164.26165008545]
[60, 13.160045285137764, 20184.44456100464]
[70, 13.16716339071488, 20183.52604484558]
[80, 13.174037212491347, 20182.185024261475]
[90, 13.200798570020392, 20210.99322128296]
[100, 13.164351424722696, 20180.684776306152]
[110, 13.18346011358515, 20203.823007583618]
[120, 13.168085631129014, 20173.34684562683]
[130, 13.173473752821083, 20162.15708732605]
[140, 13.152646908872121, 20149.074838638306]
[150, 13.17126834485923, 20177.612657546997]
[160, 13.120904069656492, 20093.86513900757]
[170, 13.103012114841073, 20075.832777023315]
[180, 13.150828386411343, 20127.511680603027]
[190, 13.1

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 13.101250277486855, 20081.339889526367] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 9.472414219659552, 14512.198392868042]
[10, 9.446846635783622, 14460.189447402954]
[20, 9.435236049383178, 14419.276723861694]
[30, 9.439801360546141, 14453.093873977661]
[40, 9.434892888480004, 14453.71636390686]
[50, 9.434604369629145, 14448.737873077393]
[60, 9.42137893497477, 14433.299434661865]
[70, 9.418078754962892, 14430.625560760498]
[80, 9.412076917703097, 14428.502361297607]
[90, 9.419222308823707, 14428.218246459961]
[100, 9.427651402844461, 14435.354219436646]
[110, 9.416538533591726, 14429.187133789062]
[120, 9.45054032846157, 14463.243263244629]
[130, 9.406845392819795, 14385.74591255188]
[140, 9.38461034042717, 14376.11245727539]
[150, 9.388167012764953, 14376.905095100403]
[160, 9.389636151784394, 14389.235824584961]
[170, 9.451917969526884, 14456.111946105957]
[180, 9.391486946036236, 14400.769720077515]
[190, 9.3919412734925

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 9.440998335108745, 14478.963287353516] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 33.669510759199255, 51580.95546722412]
[10, 33.48929242614666, 51313.98831939697]
[20, 33.475733171866395, 51275.49094390869]
[30, 33.5213425539181, 51326.58209991455]
[40, 33.50374813926438, 51320.30082702637]
[50, 33.470935861373384, 51278.60646057129]
[60, 33.59118009920842, 51445.223892211914]
[70, 33.58544108139317, 51485.7409286499]
[80, 33.55191003186896, 51360.22869873047]
[90, 33.634456519980965, 51509.43845367432]
[100, 33.570445810223376, 51421.110664367676]
[110, 33.481691987956474, 51311.68613433838]
[120, 33.80297773376146, 51780.673820495605]
[130, 34.65049993151473, 53102.06108856201]
[140, 33.469812980831136, 51312.69757080078]
[150, 33.493789010508564, 51304.99011993408]
[160, 33.46936511246405, 51260.44275665283]
[170, 33.459560976949746, 51269.4363861084]
[180, 33.477318347901026, 51312.6854095459]
[190, 33.52812696810491, 5

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 33.45729216334092, 51262.63710021973] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 49.30857352114844, 75513.25662231445]
[10, 49.24696176792872, 75444.34104919434]
[20, 49.434399517646966, 75691.99147033691]
[30, 49.22072425523564, 75386.5323562622]
[40, 49.22510606750807, 75409.43608856201]
[50, 49.28007460014005, 75460.9105682373]
[60, 49.23736951743343, 75456.89588928223]
[70, 49.18659465680234, 75438.87919616699]
[80, 49.20330613835985, 75393.94900512695]
[90, 49.254046009352564, 75464.7205734253]
[100, 49.35525175049784, 75711.16534423828]
[110, 49.32996542708992, 75574.68386077881]
[120, 49.46467895607413, 75755.44689178467]
[130, 49.19796577134892, 75384.16450500488]
[140, 49.18024812603744, 75361.46822357178]
[150, 49.1979875290674, 75395.42686462402]
[160, 49.19700331227275, 75360.45545959473]
[170, 49.287657461340686, 75520.88654327393]
[180, 49.19536081119866, 75396.8027420044]
[190, 49.2002733494532, 75403.48440551

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 50.44747762953955, 77267.82622528076] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 34.190886455167366, 52328.67995452881]
[10, 33.74739015942765, 51700.199630737305]
[20, 33.92279153828832, 52021.39100646973]
[30, 33.882463609580896, 51871.04438018799]
[40, 33.89355946334162, 51912.24939727783]
[50, 34.009333936726144, 52133.07618713379]
[60, 33.9042976269834, 51940.28365325928]
[70, 33.80043311654432, 51761.62113952637]
[80, 33.88136626161421, 51868.84475708008]
[90, 34.28631053954441, 52548.23434448242]
[100, 33.76904293388984, 51748.265785217285]
[110, 35.42437508585559, 54280.94625854492]
[120, 34.44988458897364, 52765.8384475708]
[130, 37.46964090611231, 57357.08898162842]
[140, 34.208610260766726, 52434.99607849121]
[150, 37.20060430183112, 56989.37738800049]
[160, 34.00815359170381, 52070.99378204346]
[170, 37.93228151406071, 58087.525733947754]
[180, 35.39848651338184, 54237.65512084961]
[190, 35.64766799687717, 54593.

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 46.73689868120238, 71588.62839508057] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 7.302849653806761, 11196.67621421814]
[10, 7.2579986613350815, 11120.63203716278]
[20, 7.273181514391388, 11109.407921791077]
[30, 7.311979451938958, 11202.152534484863]
[40, 7.404639285787279, 11341.71450805664]
[50, 7.252438174837875, 11109.471855163574]
[60, 7.690214942080545, 11788.290479660034]
[70, 7.271362680056699, 11158.934290885925]
[80, 7.398378712701424, 11347.756647109985]
[90, 7.3041512567754205, 11212.33922958374]
[100, 7.218385050875711, 11037.50013923645]
[110, 7.207761768883887, 11020.95011806488]
[120, 7.204647259674869, 11028.687129020691]
[130, 7.324585600247894, 11210.527710914612]
[140, 7.178923107625299, 11010.21016216278]
[150, 7.256323771439395, 11081.375252723694]
[160, 7.179396221594151, 11020.38532447815]
[170, 7.197245099214599, 11028.707592964172]
[180, 7.214706201154943, 11049.380796432495]
[190, 7.276359361394578

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 7.130076455696445, 10934.104058265686] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 17.665463788702343, 27067.213165283203]
[10, 17.46121389374098, 26757.722259521484]
[20, 17.450649373525117, 26767.30059814453]
[30, 17.50921434646487, 26823.417598724365]
[40, 17.5140504812136, 26822.021785736084]
[50, 17.487816521766604, 26783.468376159668]
[60, 17.456540319378007, 26713.951141357422]
[70, 17.471113092905547, 26754.936363220215]
[80, 17.48783398670565, 26774.81071472168]
[90, 17.48861862411698, 26796.97339630127]
[100, 17.46274834077601, 26770.40079689026]
[110, 17.54224097946605, 26841.399726867676]
[120, 17.472888465961027, 26759.00693511963]
[130, 17.461812828601808, 26723.227252960205]
[140, 17.529509711203314, 26862.29682159424]
[150, 17.545984522169626, 26856.703773498535]
[160, 17.496852663104903, 26807.31955718994]
[170, 17.442062943162245, 26746.057483673096]
[180, 17.93677485393793, 27465.86695098877]
[190, 17.48972

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 17.85765530857654, 27368.406490325928] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 4.2999580488503755, 6584.812817573547]
[10, 4.275050926768748, 6549.530746936798]
[20, 4.222496127646523, 6475.251743793488]
[30, 4.204824533225973, 6455.034149646759]
[40, 4.253733652378809, 6543.386345386505]
[50, 4.175924807864754, 6380.881892204285]
[60, 4.1967450816077285, 6431.954536437988]
[70, 4.149889888713627, 6336.921627998352]
[80, 4.11988083065023, 6334.348777770996]
[90, 4.123015122687536, 6331.347191333771]
[100, 4.184605170790271, 6414.629984378815]
[110, 4.146477822535343, 6375.565459251404]
[120, 4.163705633141042, 6364.852969169617]
[130, 4.144189883149946, 6328.031480789185]
[140, 4.157014421321082, 6382.878262042999]
[150, 4.118371792623953, 6309.184445858002]
[160, 4.132056783135815, 6362.09065246582]
[170, 4.137729031610116, 6352.4149651527405]
[180, 4.144011098162001, 6353.498405456543]
[190, 4.138259643674208, 6340.6078

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 4.128315205362385, 6309.602834224701] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 5.515414803208633, 8439.852056503296]
[10, 5.437595491932204, 8337.100004196167]
[20, 5.431676340476651, 8338.090605735779]
[30, 5.494002822173793, 8414.313876152039]
[40, 5.454340618210735, 8342.9043674469]
[50, 5.426621139516407, 8294.444808006287]
[60, 5.4223873447189135, 8310.156275749207]
[70, 5.422409532586837, 8296.061769008636]
[80, 5.464932406851268, 8374.606194972992]
[90, 5.3835159294287775, 8247.174253463745]
[100, 5.424719427646607, 8314.34372806549]
[110, 5.361279016375231, 8195.852132797241]
[120, 5.359468975515341, 8203.444749832153]
[130, 5.3464990468311555, 8201.313519954681]
[140, 5.363776929695986, 8218.512295246124]
[150, 5.369401635762606, 8246.405539512634]
[160, 5.388722288390675, 8257.826979160309]
[170, 5.380527752809051, 8232.834440231323]
[180, 5.351333814874953, 8193.44075345993]
[190, 5.359508510980532, 8213.9768018

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 5.354591641662638, 8215.789155960083] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 5.5391001371428485, 8485.313683509827]
[10, 5.593795064223964, 8565.76844215393]
[20, 5.455064184671905, 8381.49024105072]
[30, 5.528175573124898, 8459.742942810059]
[40, 5.472977514989071, 8410.75651550293]
[50, 5.458078688803292, 8370.149277687073]
[60, 5.46981732092078, 8378.960528373718]
[70, 5.474670585079542, 8396.924465179443]
[80, 5.512229065359728, 8435.904359817505]
[90, 5.515073591984283, 8440.080599784851]
[100, 5.454713143505565, 8367.061190605164]
[110, 5.512444748268426, 8423.836050987244]
[120, 5.489924778826243, 8400.927949905396]
[130, 5.478111689769257, 8392.705688476562]
[140, 5.480490993893178, 8378.051859855652]
[150, 5.498861500239559, 8401.9618434906]
[160, 5.465457409542473, 8377.885842323303]
[170, 5.4844337630209665, 8374.637853622437]
[180, 5.474497318267822, 8405.89793586731]
[190, 5.482708441682024, 8384.90248489379

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 5.493930093302117, 8458.983235359192] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 9.527253048849479, 14592.173082351685]
[10, 9.49821914518471, 14550.40175819397]
[20, 9.468824997894446, 14493.875885009766]
[30, 9.453562532330306, 14488.73226928711]
[40, 9.460750505133646, 14476.524908065796]
[50, 9.45478806607717, 14509.970539093018]
[60, 9.48009170377846, 14540.71837425232]
[70, 9.424053254388953, 14460.329542160034]
[80, 9.438553386817713, 14461.584688186646]
[90, 9.476581945743012, 14530.364468574524]
[100, 9.463645252795505, 14521.5585231781]
[110, 9.560865190570722, 14667.385032653809]
[120, 9.535511809914293, 14629.654766082764]
[130, 9.459391940044672, 14478.103107452393]
[140, 9.457472041755059, 14483.440265655518]
[150, 9.470247573703139, 14533.472520828247]
[160, 9.424783336898365, 14432.686100006104]
[170, 9.426516460687623, 14457.902856826782]
[180, 9.44258570110829, 14481.287149429321]
[190, 9.445187706860176, 1

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 9.36786704399567, 14359.1249294281] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 7.273635057494161, 11152.021366119385]
[10, 7.239893206105842, 11091.161016464233]
[20, 7.225967997359233, 11077.36517238617]
[30, 7.224442627660288, 11062.18030834198]
[40, 7.2192992429509175, 11029.857808113098]
[50, 7.224167444687288, 11058.879348754883]
[60, 7.314142258297992, 11228.068665504456]
[70, 7.218846951701311, 11040.954937934875]
[80, 7.219155283571846, 11054.270060539246]
[90, 7.221834553128434, 11052.749453544617]
[100, 7.217314596275748, 11053.609718322754]
[110, 7.252704517648363, 11113.33910369873]
[120, 7.206667878297851, 11044.698041915894]
[130, 7.243584698236331, 11085.118740081787]
[140, 7.215091531021477, 11062.201922416687]
[150, 7.249667203146235, 11120.597465515137]
[160, 7.225473138744464, 11066.52648639679]
[170, 7.507253286421455, 11515.149678230286]
[180, 7.226297620693635, 11070.064179420471]
[190, 7.24570238932617

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 7.266036636213099, 11111.304511070251] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 25.48405329801395, 39042.987297058105]
[10, 25.259178268691578, 38693.367206573486]
[20, 25.3409408365155, 38782.67802810669]
[30, 25.26062362312337, 38711.32697677612]
[40, 25.424456384723555, 38954.05257797241]
[50, 25.22917571702763, 38637.661029815674]
[60, 25.23108006270685, 38679.83486175537]
[70, 25.21430955543219, 38606.31111526489]
[80, 25.25876764840308, 38724.21152496338]
[90, 31.692825792974965, 48539.55658721924]
[100, 26.003572224948176, 39853.20846939087]
[110, 25.20314865012704, 38628.65370941162]
[120, 25.212745514613218, 38612.69825363159]
[130, 25.19664027609962, 38608.87417221069]
[140, 25.230761981197187, 38657.62678527832]
[150, 25.243337506725023, 38673.277488708496]
[160, 25.893202846417537, 39720.946548461914]
[170, 25.224089144415395, 38608.71342468262]
[180, 28.71886943837059, 43987.740226745605]
[190, 27.482292404374

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 27.452684412425864, 42087.62232589722] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 49.44027660658714, 75770.13905334473]
[10, 49.245397991050936, 75415.1788482666]
[20, 49.23855662532637, 75419.27812957764]
[30, 49.225421696356634, 75421.30503082275]
[40, 49.19943042087804, 75386.24850463867]
[50, 49.317060231539976, 75543.96272277832]
[60, 49.23784814241972, 75438.40398406982]
[70, 49.18640054050376, 75341.37744140625]
[80, 49.25081580737241, 75461.65323638916]
[90, 49.18262347228845, 75383.23400878906]
[100, 49.19690186423359, 75379.35953521729]
[110, 49.17878526433641, 75333.66242980957]
[120, 49.194053087159794, 75382.60768127441]
[130, 58.23193842437186, 89218.40689849854]
[140, 56.185101312383345, 86090.58374023438]
[150, 49.17012419837573, 75338.5138015747]
[160, 49.268999891555026, 75532.68563842773]
[170, 49.23782528411626, 75416.74452972412]
[180, 49.18262344738833, 75358.25327301025]
[190, 49.18571448263861, 75349.

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [380, 49.20412660205333, 75406.86559295654] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 49.55358838412531, 75938.05898284912]
[10, 49.24298340720234, 75428.65898132324]
[20, 49.22485488015118, 75396.75902557373]
[30, 49.32277317893723, 75567.83193969727]
[40, 49.26235868787641, 75466.42098236084]
[50, 49.22656614039648, 75382.28865814209]
[60, 49.258731762360654, 75439.04833221436]
[70, 49.21540269776984, 75389.41025543213]
[80, 49.20013753925851, 75389.2894897461]
[90, 49.200990144017474, 75375.6109161377]
[100, 49.51270894780171, 75820.0996017456]
[110, 49.22972721468375, 75424.67574310303]
[120, 49.23251865675804, 75384.12921905518]
[130, 49.27932499718728, 75520.98466491699]
[140, 49.26975576435617, 75481.27462768555]
[150, 49.273656043309146, 75489.41920471191]
[160, 49.225327967352406, 75394.9866027832]
[170, 49.27232575976817, 75461.44284820557]
[180, 49.20441474117747, 75395.4423904419]
[190, 49.24374386162422, 75463.728263

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 49.199056615406164, 75361.83853912354] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 7.220219777084829, 11071.234227180481]
[10, 7.210130537770125, 11045.764811515808]
[20, 7.239790388560482, 11073.551614761353]
[30, 7.20715834017522, 11047.057676315308]
[40, 7.195957659430043, 11038.084864616394]
[50, 7.1844488217376234, 11021.33557510376]
[60, 7.221463169815023, 11063.183999061584]
[70, 7.201411183758131, 11044.2991065979]
[80, 7.187721675743322, 11023.507776260376]
[90, 7.2090545018098995, 11009.211338996887]
[100, 7.184484989773822, 11032.74584388733]
[110, 7.199054582934466, 11032.99010181427]
[120, 7.19349778165394, 11026.836469650269]
[130, 7.196321994766554, 11028.489280700684]
[140, 7.188691781644099, 10990.115491867065]
[150, 7.189272520125068, 11003.174954414368]
[160, 7.201757214399293, 11023.75177192688]
[170, 7.2068577553522495, 11054.320140838623]
[180, 7.194770547802081, 10995.94469165802]
[190, 7.19273863386547

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [260, 7.193476287876657, 11023.505520820618] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 25.32148116188946, 38763.646156311035]
[10, 25.182253484003848, 38590.42110443115]
[20, 25.370368830528957, 38848.108001708984]
[30, 25.18193351008562, 38581.300815582275]
[40, 25.16848152549086, 38556.553787231445]
[50, 25.45383160045813, 38998.846897125244]
[60, 25.199125802859314, 38594.30823516846]
[70, 25.182110661937426, 38589.5537071228]
[80, 25.215999994203255, 38649.49802017212]
[90, 25.223002082685266, 38633.99574279785]
[100, 25.196898764169557, 38599.8200378418]
[110, 25.189713859060102, 38599.03638076782]
[120, 25.163833989175743, 38559.25389099121]
[130, 25.173507110257063, 38568.02142715454]
[140, 25.19782139551546, 38558.35412979126]
[150, 25.19143143895401, 38573.42198944092]
[160, 25.391203621348886, 38899.903186798096]
[170, 25.253678765060386, 38660.8662109375]
[180, 25.18019403303261, 38580.42328262329]
[190, 25.19475769560

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 25.09765838393966, 38445.510345458984] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 4.251019384157564, 6521.66766166687]
[10, 4.220256223691039, 6474.408753395081]
[20, 4.233608739494343, 6492.793440818787]
[30, 4.213841546920198, 6463.41499376297]
[40, 4.225766323565192, 6472.907464027405]
[50, 4.261054395384328, 6511.471202850342]
[60, 4.223737380834534, 6481.641915798187]
[70, 4.254135127478418, 6502.858449935913]
[80, 4.226332739501336, 6481.2013330459595]
[90, 4.245850522586633, 6514.6941022872925]
[100, 4.2149549380294955, 6445.363321781158]
[110, 4.231689130046037, 6488.119964122772]
[120, 4.245919307280459, 6506.056579589844]
[130, 4.222527617262798, 6456.826749324799]
[140, 4.219892616060322, 6471.805478096008]
[150, 4.223304156223725, 6484.872572422028]
[160, 4.238085811816681, 6508.153966426849]
[170, 4.243921383553945, 6520.023809909821]
[180, 4.241028862895916, 6506.383654594421]
[190, 4.230390067822628, 6498.9715

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 4.259399903972217, 6523.86442565918] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 13.26905396090475, 20356.36393737793]
[10, 13.254908769622483, 20297.608501434326]
[20, 13.208947280057105, 20252.11197090149]
[30, 13.233811384704032, 20277.06005859375]
[40, 13.20075909836174, 20242.967010498047]
[50, 13.21520101059199, 20267.637004852295]
[60, 13.23751305537809, 20252.137926101685]
[70, 13.231963509990404, 20258.001348495483]
[80, 13.19888304668058, 20248.25938796997]
[90, 13.235221799297681, 20277.8058052063]
[100, 13.214097746358528, 20215.349172592163]
[110, 13.278569754358994, 20336.99924850464]
[120, 13.199226938402061, 20246.360328674316]
[130, 13.246273263627494, 20296.197509765625]
[140, 13.234448597263107, 20258.52487564087]
[150, 13.223262293843003, 20266.989316940308]
[160, 13.216926648162364, 20259.01576423645]
[170, 13.24653097605892, 20298.176315307617]
[180, 13.231781222490355, 20275.142457962036]
[190, 13.26671

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 13.213150358075573, 20212.332027435303] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 3.4907181586671436, 5364.982429265976]
[10, 3.5020098614630437, 5385.969580173492]
[20, 3.4761679455754653, 5348.375873565674]
[30, 3.493048132866543, 5369.8297963142395]
[40, 3.4591767271256013, 5304.920597553253]
[50, 3.636115964336744, 5600.265036582947]
[60, 3.472972784590161, 5294.667067527771]
[70, 3.4817609780762275, 5336.637389183044]
[80, 3.4934145395811793, 5337.234825134277]
[90, 3.408185270062937, 5230.993200302124]
[100, 3.407912748600733, 5209.18724155426]
[110, 3.399333878067704, 5215.263561248779]
[120, 3.412983393855879, 5238.883065700531]
[130, 3.442101537091925, 5275.797254085541]
[140, 3.4700861702388632, 5315.089317321777]
[150, 3.4510725257290873, 5293.895131587982]
[160, 3.4071630581863244, 5189.984443187714]
[170, 3.417115849863456, 5237.610065937042]
[180, 3.4002004525680145, 5194.839154720306]
[190, 3.416967123669059,

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 3.4355078836022717, 5248.397716999054] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 17.811038241373964, 27271.134956359863]
[10, 17.585357783046156, 26937.595973968506]
[20, 17.60614866749736, 26984.55609512329]
[30, 17.62558351422103, 27004.287170410156]
[40, 17.604524507846286, 26975.55235671997]
[50, 17.57947947898048, 26915.29682159424]
[60, 17.7296633732848, 27153.024879455566]
[70, 17.873977329961313, 27398.551517486572]
[80, 18.157648059157726, 27809.25820541382]
[90, 17.80630143068478, 27273.665397644043]
[100, 18.161436466884364, 27793.955360412598]
[110, 17.98215518234293, 27528.6435546875]
[120, 17.597340038488802, 26954.87921524048]
[130, 18.026270196581013, 27614.522789001465]
[140, 17.736386152222636, 27159.36728286743]
[150, 17.881007448500192, 27416.488124847412]
[160, 19.277035745565946, 29548.63712310791]
[170, 17.953501101262265, 27479.221641540527]
[180, 18.755597328703956, 28730.96438217163]
[190, 17.67168

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 20.34045782412935, 31182.47979736328] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 7.364984521044141, 11265.019958496094]
[10, 7.1974166080783615, 11033.681378364563]
[20, 7.182776858227682, 11012.033140182495]
[30, 7.2904716969781385, 11166.846658706665]
[40, 7.17326598503571, 11003.51290512085]
[50, 7.171668528887995, 10993.017140388489]
[60, 7.174799289778069, 10986.663332939148]
[70, 7.203391511508753, 11026.744850158691]
[80, 7.2099995139996, 11029.899762153625]
[90, 7.178239280810244, 11000.044486045837]
[100, 7.185250386245568, 11014.138723373413]
[110, 7.201892706495041, 11045.093845367432]
[120, 7.194225024305498, 11006.87321472168]
[130, 7.259915066138882, 11118.00018787384]
[140, 7.174244098190228, 10974.27128314972]
[150, 7.184714083883221, 10992.214470863342]
[160, 7.176713698215335, 10997.30408000946]
[170, 7.173706699600419, 10994.915661811829]
[180, 7.216250581467432, 11043.011164665222]
[190, 7.21694058662295,

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 7.409826847031596, 11365.662544250488] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 13.245921346599689, 20273.092193603516]
[10, 13.237742790042887, 20289.977033615112]
[20, 13.193576515187793, 20224.82515144348]
[30, 13.201784087845924, 20222.19513320923]
[40, 13.218162190509528, 20242.56378555298]
[50, 13.164577729396969, 20189.700424194336]
[60, 13.21154886997711, 20253.428340911865]
[70, 13.259568880494518, 20298.19228553772]
[80, 13.182445526123047, 20194.58420944214]
[90, 13.181099689971685, 20191.587043762207]
[100, 13.215091882113066, 20226.59238433838]
[110, 13.242116116356913, 20289.040603637695]
[120, 13.198785791820397, 20230.682010650635]
[130, 13.170641960114162, 20164.089267730713]
[140, 13.192309959750263, 20209.574369430542]
[150, 13.207169390845236, 20227.341426849365]
[160, 13.167947203932481, 20172.86767578125]
[170, 13.167929878434064, 20174.666471481323]
[180, 13.133507261699547, 20127.525115966797]
[190,

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 13.097393730601816, 20095.97529411316] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 17.526228018280108, 26856.275382995605]
[10, 17.46295026823995, 26750.144104003906]
[20, 17.472316595032694, 26782.3761177063]
[30, 17.487433577953368, 26798.103630065918]
[40, 17.524671293114245, 26840.641342163086]
[50, 17.541084884663476, 26856.486183166504]
[60, 17.553328364698444, 26884.471729278564]
[70, 17.48138975287853, 26776.758415222168]
[80, 17.558371172872597, 26906.73222732544]
[90, 17.50438764387883, 26786.458751678467]
[100, 17.474242827910977, 26762.204891204834]
[110, 17.570692059887918, 26902.600032806396]
[120, 17.47474791424704, 26766.34645462036]
[130, 17.47777490391744, 26757.407497406006]
[140, 17.497246363766823, 26806.659706115723]
[150, 17.545796003416374, 26889.706089019775]
[160, 17.478830989907365, 26776.684127807617]
[170, 17.7354478487458, 27208.80648803711]
[180, 17.46981857339645, 26745.671810150146]
[190, 17.7

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 18.988072878386895, 29091.705226898193] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 25.340220192393808, 38819.58018875122]
[10, 25.377228398235907, 38879.93911743164]
[20, 25.919201435059232, 39706.66779708862]
[30, 25.19824803404646, 38617.11011123657]
[40, 25.323632364795973, 38788.645458221436]
[50, 25.22926298943263, 38659.05611419678]
[60, 25.37804232813982, 38890.47583389282]
[70, 25.21915036642209, 38653.039558410645]
[80, 25.232196232668723, 38630.153160095215]
[90, 25.287774285199436, 38758.0986289978]
[100, 25.265594681622776, 38707.45376205444]
[110, 25.34647546673568, 38799.323863983154]
[120, 28.467341500225018, 43613.28967666626]
[130, 25.17967426310009, 38576.65171432495]
[140, 25.220961784880714, 38643.51441192627]
[150, 25.18680888741197, 38572.64513397217]
[160, 25.18134291925256, 38580.590980529785]
[170, 26.46353793579978, 40547.217586517334]
[180, 25.67926932999733, 39347.60200881958]
[190, 25.21159040087

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 29.19800867175309, 44733.2232131958] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 5.617602701909237, 8584.495675086975]
[10, 5.491348926454549, 8394.757570266724]
[20, 5.710732971408037, 8751.408754348755]
[30, 5.529252201085303, 8474.21453666687]
[40, 5.497904265207037, 8427.0021982193]
[50, 5.539780780478495, 8480.731350421906]
[60, 5.458960383118911, 8357.596013069153]
[70, 5.450112689568542, 8358.613034248352]
[80, 5.456571732115186, 8367.553896903992]
[90, 5.5222753880851885, 8445.328283309937]
[100, 5.490045546862849, 8374.354510307312]
[110, 5.459276149540595, 8373.12493133545]
[120, 5.574178806172961, 8512.346039295197]
[130, 5.455844090750572, 8373.388621330261]
[140, 5.502542581944179, 8427.42822265625]
[150, 5.537716506977928, 8472.440383911133]
[160, 5.4591241673452116, 8363.164922714233]
[170, 5.481374802228365, 8405.89762020111]
[180, 5.599531526976404, 8579.1199426651]
[190, 5.451808173102436, 8359.692193031311]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 5.386773788586609, 8262.543182373047] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 5.476240896369396, 8406.707580566406]
[10, 5.4405049163429915, 8336.548017978668]
[20, 5.437362995844908, 8357.691541194916]
[30, 5.56246155173598, 8516.39223575592]
[40, 5.507655100162596, 8444.475249767303]
[50, 5.424399398948132, 8320.924368858337]
[60, 5.420474643495624, 8305.363582611084]
[70, 5.429989863936023, 8315.131604194641]
[80, 5.419031819537787, 8315.612982749939]
[90, 5.47031062856978, 8376.041096687317]
[100, 5.3805285639302225, 8242.237845420837]
[110, 5.383075448302625, 8243.811965942383]
[120, 5.347821109288667, 8217.665076255798]
[130, 5.382835433003796, 8249.151242256165]
[140, 5.3644889988413675, 8204.288119316101]
[150, 5.365227319553067, 8219.034438610077]
[160, 5.375281029519462, 8231.02914237976]
[170, 5.3671251918254885, 8222.548994064331]
[180, 5.381364371695655, 8224.069343566895]
[190, 5.376617655741639, 8228.279791

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 5.3645088865613815, 8210.110482215881] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 25.253827969025693, 38701.335094451904]
[10, 25.219132239140045, 38645.235538482666]
[20, 25.213187568803992, 38638.568950653076]
[30, 25.246824842208984, 38651.87650299072]
[40, 25.239802726566325, 38656.08157348633]
[50, 25.17064557685553, 38578.54986190796]
[60, 25.19569328248345, 38597.677768707275]
[70, 25.22367694794976, 38659.73076629639]
[80, 25.216202325049018, 38587.71215057373]
[90, 25.236478710921563, 38640.71747589111]
[100, 25.250246438905403, 38685.540924072266]
[110, 25.18725819251556, 38597.730728149414]
[120, 25.250061652679044, 38655.451625823975]
[130, 25.199500332924465, 38625.69470977783]
[140, 25.174269628898283, 38576.46764755249]
[150, 25.192541570638554, 38578.693183898926]
[160, 25.219648032524567, 38623.406032562256]
[170, 25.201337286448666, 38665.139614105225]
[180, 25.23300948279956, 38649.052169799805]
[190, 25.2

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 25.148370941998753, 38525.338764190674] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 5.510801093074111, 8446.369108200073]
[10, 5.496059676063279, 8410.89286518097]
[20, 5.5293385889138005, 8471.969139099121]
[30, 5.429073460108306, 8332.50236415863]
[40, 5.499447685620181, 8429.870421409607]
[50, 5.41717398509033, 8290.245662212372]
[60, 5.455215078109861, 8355.742191314697]
[70, 5.438152576551114, 8332.094625473022]
[80, 5.497967498420735, 8415.958005905151]
[90, 5.4270801513064315, 8313.9182138443]
[100, 5.420798096893351, 8304.373675346375]
[110, 5.453995809853855, 8344.14621591568]
[120, 5.428170999721198, 8323.086682319641]
[130, 5.4576492683072, 8357.174057006836]
[140, 5.582258923557969, 8535.254145622253]
[150, 5.494897201851828, 8422.993500709534]
[160, 5.46339294057602, 8379.141674995422]
[170, 5.43183599937678, 8309.882678031921]
[180, 5.455070895252278, 8362.81089925766]
[190, 5.4364463430160646, 8336.550159454346

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 5.359000624315546, 8179.571119308472] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 5.650775276338463, 8671.381410598755]
[10, 5.54977149315976, 8535.049955368042]
[20, 5.510770683811477, 8436.459645271301]
[30, 5.522854536693962, 8483.957187652588]
[40, 5.506279903043343, 8444.962621688843]
[50, 5.558110582610646, 8531.296452522278]
[60, 5.475111882306888, 8386.553521156311]
[70, 5.464073060386797, 8395.749444007874]
[80, 5.476558027317257, 8402.386767864227]
[90, 5.45480938244115, 8378.602392196655]
[100, 5.453015793708223, 8375.528444290161]
[110, 5.454941896483419, 8389.170750617981]
[120, 5.478924105123814, 8366.107786178589]
[130, 5.455156249103596, 8360.966100692749]
[140, 5.466941161193051, 8376.219821929932]
[150, 5.460040385044586, 8378.154504776001]
[160, 5.4688303159360165, 8359.065012931824]
[170, 5.449722900091824, 8363.844736099243]
[180, 5.481778678943844, 8372.99199104309]
[190, 5.495737741883679, 8416.57546997

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 5.47099887982361, 8341.12936592102] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 13.270398202203895, 20328.590488433838]
[10, 13.24854858488078, 20317.13682937622]
[20, 13.306338744748667, 20408.4688911438]
[30, 13.226309808676298, 20261.693798065186]
[40, 13.2094144161314, 20247.6416721344]
[50, 13.202254587925445, 20234.096433639526]
[60, 13.211370339904068, 20236.346979141235]
[70, 13.20104090289721, 20211.06093788147]
[80, 13.354847168486673, 20437.335744857788]
[90, 13.303902678327834, 20402.787143707275]
[100, 13.245603384610567, 20271.13264465332]
[110, 13.204881816246491, 20230.05023765564]
[120, 13.461770356479574, 20612.90524482727]
[130, 13.210270533051254, 20261.913162231445]
[140, 13.42353337116092, 20563.08787727356]
[150, 13.630217267078768, 20876.53154182434]
[160, 13.189386056546443, 20211.689443588257]
[170, 13.190454646127964, 20221.949668884277]
[180, 13.20380621852825, 20215.534589767456]
[190, 13.22456265

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 16.402757942209668, 25125.262020111084] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 33.59516389811942, 51468.69696807861]
[10, 33.54065401560333, 51376.12897491455]
[20, 33.82290152407813, 51846.202072143555]
[30, 33.5034286409383, 51340.73063659668]
[40, 33.476926151205916, 51260.360260009766]
[50, 34.16090584299272, 52344.234550476074]
[60, 34.169882258920694, 52369.40948486328]
[70, 33.501246046459705, 51359.88333129883]
[80, 33.53740407281383, 51372.356269836426]
[90, 33.60017408465592, 51447.92951965332]
[100, 33.523825234594916, 51355.9144821167]
[110, 33.835439542566206, 51808.46738433838]
[120, 33.51574695079196, 51393.64669799805]
[130, 33.47516474711366, 51331.72847747803]
[140, 33.61882024520994, 51497.41158294678]
[150, 33.452400725441876, 51295.504318237305]
[160, 33.44315911086359, 51322.958923339844]
[170, 33.42108034278332, 51250.33840179443]
[180, 33.444946119741736, 51237.179191589355]
[190, 33.4854255133447

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 33.62050149085939, 51507.18606567383] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 3.513903454918774, 5367.4483342170715]
[10, 3.459498403277783, 5298.015577793121]
[20, 3.475135785481326, 5325.72062253952]
[30, 3.477416434736227, 5346.115303993225]
[40, 3.500157075669062, 5356.012333393097]
[50, 3.4975320903812936, 5379.243298530579]
[60, 3.5134148480998006, 5371.156842708588]
[70, 3.4934049911660874, 5363.648013591766]
[80, 3.494642704336201, 5363.362739562988]
[90, 3.5211186069732547, 5399.065121173859]
[100, 3.566020352722148, 5464.915285110474]
[110, 3.5372635266177026, 5425.606779575348]
[120, 3.478874431733363, 5304.936037063599]
[130, 3.4807044401492524, 5371.319539070129]
[140, 3.470788019132988, 5317.531506538391]
[150, 3.4934633067631533, 5387.000785827637]
[160, 3.5244610213112892, 5407.022938251495]
[170, 3.4641959505355078, 5303.081403970718]
[180, 3.546912930652927, 5419.294477462769]
[190, 3.4691003430294307, 5

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 3.4552733369035447, 5290.318769931793] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 5.516529250705211, 8476.823435783386]
[10, 5.504610041102914, 8470.703154563904]
[20, 5.497798603446303, 8418.79364490509]
[30, 5.507086596352002, 8428.2437210083]
[40, 5.4809045822751115, 8392.42056465149]
[50, 5.4879189608302505, 8430.738691329956]
[60, 5.492684648802635, 8423.070785522461]
[70, 5.495672227819657, 8428.33462715149]
[80, 5.504194628787725, 8405.61438369751]
[90, 5.52658524587945, 8504.32648563385]
[100, 5.484561419051247, 8404.171030044556]
[110, 5.4599995743823735, 8373.549193382263]
[120, 5.523173921724523, 8426.951028823853]
[130, 5.4733994150286245, 8413.920838356018]
[140, 5.4673258971918965, 8389.325669288635]
[150, 5.4688562388208455, 8390.193697929382]
[160, 5.480843433823349, 8376.561886787415]
[170, 5.494273518146485, 8422.682786941528]
[180, 5.470438401941842, 8396.614825248718]
[190, 5.473960899497448, 8377.1103181

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 5.437500551537497, 8317.042335987091] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 49.42116749815779, 75731.00859069824]
[10, 49.236688529231216, 75409.94254302979]
[20, 49.30200818811322, 75539.78045654297]
[30, 49.60360726239476, 76028.10994720459]
[40, 49.19895693527501, 75389.18611907959]
[50, 49.185583727166794, 75357.01206970215]
[60, 49.27783484994276, 75493.23547363281]
[70, 49.21447360484469, 75377.92835235596]
[80, 49.17946137211653, 75333.39560699463]
[90, 49.183800535475925, 75365.40968322754]
[100, 49.2372568095633, 75425.95761871338]
[110, 49.19285775226962, 75339.15281677246]
[120, 49.2337442552452, 75442.26602935791]
[130, 49.190645382236255, 75350.6983718872]
[140, 49.183800102213965, 75373.245262146]
[150, 49.21393128039009, 75417.22176361084]
[160, 49.21334597834097, 75396.46871185303]
[170, 49.180787223437434, 75337.09218597412]
[180, 49.608214047185434, 75963.05506896973]
[190, 62.73287201923739, 96046.034

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 57.12751056880303, 87504.66976165771] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 4.275524848435006, 6526.139853000641]
[10, 4.212690535164377, 6445.979467868805]
[20, 4.2083328299360545, 6449.574989318848]
[30, 4.213059145850239, 6443.327093601227]
[40, 4.2432236260595895, 6476.245148658752]
[50, 4.24645081605986, 6495.009843826294]
[60, 4.24317324939658, 6478.514399051666]
[70, 4.265162951329981, 6545.076111793518]
[80, 4.226118446018926, 6488.342780590057]
[90, 4.207724137343564, 6465.523302078247]
[100, 4.235117340523643, 6492.119307041168]
[110, 4.215631760131597, 6442.968263626099]
[120, 4.209585809520891, 6443.876799106598]
[130, 4.247374536163191, 6503.259771347046]
[140, 4.315503047278282, 6615.5542249679565]
[150, 4.3013419488702676, 6593.364197731018]
[160, 4.2081543720733405, 6436.989390850067]
[170, 4.2724172599010615, 6534.378859996796]
[180, 4.232942141067266, 6485.909141540527]
[190, 4.283742035337902, 6566.21

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 4.2111094091330745, 6448.58171415329] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 13.269711011383615, 20315.576679229736]
[10, 13.201979857511994, 20216.493183135986]
[20, 13.17494081641613, 20228.252420425415]
[30, 13.205146745975584, 20210.02904701233]
[40, 13.188803442464485, 20192.25069999695]
[50, 13.218695548433548, 20242.44766807556]
[60, 13.195167551463951, 20204.8498210907]
[70, 13.167571772483248, 20180.197692871094]
[80, 13.171491927951186, 20167.72544670105]
[90, 13.200221440188256, 20221.126989364624]
[100, 13.172404475996451, 20200.37854194641]
[110, 13.225243631915697, 20268.54631614685]
[120, 13.213043298796014, 20258.775663375854]
[130, 13.17585172603398, 20195.404455184937]
[140, 13.182645918495039, 20191.199480056763]
[150, 13.187090993238803, 20185.53533554077]
[160, 13.126838872077881, 20103.24370956421]
[170, 13.126608211128893, 20119.167348861694]
[180, 13.1063796484128, 20089.093002319336]
[190, 13.131

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 14.385957701710435, 22033.034788131714] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 9.508803164678827, 14584.091905593872]
[10, 9.461747350020445, 14476.686279296875]
[20, 9.423263856076074, 14448.572284698486]
[30, 9.477996181258955, 14542.573026657104]
[40, 9.434249281571988, 14460.980318069458]
[50, 9.430441886264413, 14464.165391921997]
[60, 9.428227986119124, 14452.83332824707]
[70, 9.42330304387344, 14443.98768234253]
[80, 9.425197967350016, 14426.859830856323]
[90, 9.445112995317025, 14485.317750930786]
[100, 9.427790629334611, 14435.149810791016]
[110, 9.433593809137768, 14449.991325378418]
[120, 9.423671443530846, 14458.766145706177]
[130, 9.435739956698903, 14452.812093734741]
[140, 9.43422785751502, 14440.929935455322]
[150, 9.487058405465309, 14513.322410583496]
[160, 9.44231953259859, 14444.654655456543]
[170, 9.42115697910518, 14439.998339653015]
[180, 9.41981622257681, 14434.98790550232]
[190, 9.438750958006937

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

Old model still stands:
Current loss:  [490, 13.070631265017756, 20028.19465637207] 
Best loss: [490, 3.433227788063627, 5257.182981014252]
[0, 7.2281988800038866, 11069.042422294617]
[10, 7.296327894723758, 11172.568161964417]
[20, 7.24367381698469, 11085.388676643372]
[30, 7.232542816092389, 11075.796612739563]
[40, 7.2210150063816, 11053.273871421814]
[50, 7.339766492420326, 11247.643494606018]
[60, 7.216008122222542, 11052.559129714966]
[70, 7.229036391559531, 11100.294623374939]
[80, 7.266934467669255, 11121.331694602966]
[90, 7.2013199273350965, 11000.240265846252]
[100, 7.227924704862948, 11056.10896396637]
[110, 7.195872515362175, 11037.959086418152]
[120, 7.254515327921736, 11118.891551017761]
[130, 7.556383086869362, 11584.726097106934]
[140, 7.297148060238393, 11178.423092842102]
[150, 7.237752941818835, 11092.271032333374]
[160, 7.21236638985477, 11039.9593334198]
[170, 7.261471201794577, 11093.417952537537]
[180, 7.216116129573892, 11052.30354309082]
[190, 7.26195337878194

Featurizing p:   0%|          | 0/1000 [00:00<?, ?it/s]

Featurizing q:   0%|          | 0/3000 [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[100, 33.67898583972423, 51601.20234680176]
[110, 33.502887835390574, 51319.5551071167]
[120, 33.464323123503604, 51299.46531677246]
[130, 33.55226404673126, 51416.02634429932]
[140, 33.484492762593, 51307.63957977295]
[150, 33.521741568264076, 51344.49035644531]
[160, 33.5290400303375, 51327.16030883789]
[170, 33.484469236966525, 51304.66120147705]
[180, 33.51798505260179, 51349.04504394531]
[190, 33.44905003679639, 51233.41590118408]
[200, 33.47063831996669, 51298.23156738281]
[210, 33.531719110653235, 51353.78197479248]
[220, 33.736507694652744, 51681.36962890625]
[230, 33.5739697936932, 51474.42967224121]
[240, 33.56911430657688, 51440.28953552246]


In [None]:
a = torch.zeros(10,5,1)

In [None]:
a[:,-1,:].size()

In [None]:
b = torch.zeros(30,1)
c = torch.zeros(1,1)


In [None]:
torch.cat((b[1:],c))

In [None]:
23