In [1]:
#!/usr/bin/env python3
import os
import sys

# BEGIN THREAD SETTINGS this sets the number of threads used by numpy in the program
# (should be set to 1 to avoid implicit parallelism)
implicit_num_threads = 1
os.environ["OMP_NUM_THREADS"] = str(implicit_num_threads)
os.environ["MKL_NUM_THREADS"] = str(implicit_num_threads)
os.environ["OPENBLAS_NUM_THREADS"] = str(implicit_num_threads)
# END THREAD SETTINGS

import numpy
import torch
from numpy import random
import matplotlib
import pickle
matplotlib.use('agg')
from matplotlib import pyplot as plt
import threading
import time
import pandas
from collections import deque

from tqdm import tqdm
from google.colab import files, drive

In [2]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
path = "/content/gdrive/MyDrive/data_daily.csv"
# reading the CSV file
csvFile = pandas.read_csv(path)
 
# displaying the contents of the CSV file
print(csvFile)

         # Date  Receipt_Count
0    2021-01-01        7564766
1    2021-01-02        7455524
2    2021-01-03        7095414
3    2021-01-04        7666163
4    2021-01-05        7771289
..          ...            ...
360  2021-12-27       10350408
361  2021-12-28       10219445
362  2021-12-29       10313337
363  2021-12-30       10310644
364  2021-12-31       10211187

[365 rows x 2 columns]


In [10]:
def train_test_split(Xs, Ys, test_size, random_state):
    random.seed(random_state)
    random.shuffle(Xs)
    random.shuffle(Ys)
    test_set_size = int(test_size * len(Ys))
    Xs_tr, Xs_va, Ys_tr, Ys_va = Xs[test_set_size:], Xs[:test_set_size], Ys[test_set_size:], Ys[:test_set_size]
    return Xs_tr, Xs_va, Ys_tr, Ys_va

In [11]:
# setting the days ahead range for predicting the approximate number of the scanned receipts for a future day
day_range = 30 # using the data from day 0 to day 29 to predict day 30

# constructing features and target variables
Receipt_Count = csvFile["Receipt_Count"].array.to_numpy()
Xs = [[Receipt_Count[j] for j in range(i, i + day_range)] for i in range(335)]
Ys = Receipt_Count[day_range:]
assert len(Xs) == len(Ys)
# perform train-validation (0.8 vs 0.2) split
Xs_tr, Xs_va, Ys_tr, Ys_va = train_test_split(Xs, 
                                              Ys, 
                                              test_size = 0.2, 
                                              random_state = 123)

In [273]:
# normalization of input data
mean = numpy.mean(Receipt_Count)
std = numpy.std(Receipt_Count)
Xs_tr = numpy.array(Xs_tr, dtype=float).reshape(day_range, -1)
Xs_va = numpy.array(Xs_va, dtype=float).reshape(day_range, -1)
Ys_tr = numpy.array(Ys_tr, dtype=float).reshape(1, -1)
Ys_va = numpy.array(Ys_va, dtype=float).reshape(1, -1)
Xs_tr = (Xs_tr - mean) / std
Ys_tr = (Ys_tr - mean) / std
Xs_tr = (Xs_va - mean) / std
Ys_tr = (Ys_va - mean) / std
receipts_dataset = (Xs_tr, Xs_va, Ys_tr, Ys_va)
print(Xs_tr.shape, Xs_va.shape, Ys_tr.shape, Ys_va.shape)

(30, 67) (30, 67) (1, 67) (1, 67)


In [274]:
# weight matrix initialization
W0 = numpy.zeros((len(Ys_tr), len(Xs_tr)))
print(W0.shape)

(1, 30)


In [275]:
# SGD + Momentum (threaded) for Ridge Regression
#
# Xs              training examples (d * n)
# Ys              training labels   (c * n)
# gamma           L2 regularization constant
# W0              the initial value of the parameters (c * d)
# alpha           step size/learning rate
# beta            momentum hyperparameter
# B               minibatch size
# num_epochs      number of epochs (passes through the training set) to run
# monitor_period  how frequently, in terms of batches (not epochs) to output the parameter vector
# num_threads     how many threads to use
#
# returns         the final model arrived at at the end of training
def ridge_sgd_mss_with_momentum_threaded(Xs, Ys, gamma, W0, alpha, beta, B, num_epochs, num_threads):
    (d, n) = Xs.shape
    (c, d) = W0.shape
    # perform global setup/initialization/allocation
    V = numpy.zeros(W0.shape)
    W = numpy.copy(W0)
    gradient = numpy.zeros(W0.shape)
    Bt = int(B / num_threads)

    # construct the barrier object
    iter_barrier = threading.Barrier(num_threads + 1)

    # a function for each thread to run
    def thread_main(ithread):
        # perform any per-thread allocations
        # avoid memory allocation in the running of program by pre-allocating memories ahead
        XdotX = numpy.zeros((d, d))
        WdotXdotX = numpy.zeros(W0.shape)
        YdotX = numpy.zeros(W0.shape)
        gammaW = numpy.zeros(W0.shape)
        multinomial_logreg_grad_i = numpy.zeros(W0.shape)

        slices_X = []
        slices_Y = []
        for ibatch in range(int(n/B)):
            ii = range(ibatch*B + ithread*Bt, ibatch*B + (ithread+1)*Bt)
            slices_X.append(numpy.ascontiguousarray(Xs[:,ii]))
            slices_Y.append(numpy.ascontiguousarray(Ys[:,ii]))
        # gradint calculation (uses only pre-allocated memories to improve performance of SGD)
        for it in range(num_epochs):
            for ibatch in range(int(n/B)):
                # work done by thread in each iteration;
                # this section of code primarily uses numpy operations with the "out=" argument specified
                numpy.dot(slices_X[ibatch], numpy.transpose(slices_X[ibatch]), out=XdotX)
                numpy.dot(W, XdotX, out=WdotXdotX)
                numpy.dot(slices_Y[ibatch], numpy.transpose(slices_X[ibatch]), out=YdotX)
                numpy.multiply(gamma, W, out=gammaW)
                numpy.subtract(WdotXdotX, YdotX, out=multinomial_logreg_grad_i)
                numpy.add(multinomial_logreg_grad_i, gammaW, out=multinomial_logreg_grad_i)
                
                iter_barrier.wait() # wait for all threads to finish computation before moving up to next step
                numpy.add(gradient, multinomial_logreg_grad_i, out=gradient)
                
                iter_barrier.wait()

    worker_threads = [threading.Thread(target=thread_main, args=(it,)) for it in range(num_threads)]

    for t in worker_threads:
        print("running thread ", t)
        t.start()

    print("Running minibatch sequential-scan SGD with momentum (%d threads)" % num_threads)
    # gradient & momentum update:
    # v <- beta * v - alpha * gradient
    # w <- w + v
    for it in tqdm(range(num_epochs)):
        for ibatch in range(int(n/B)):
            numpy.multiply(gradient, 0, out=gradient)
            iter_barrier.wait()
            # work done on a single thread at each iteration;
            # this section of code primarily uses numpy operations with the "out=" argument specified
            numpy.divide(gradient, B, out=gradient)
            numpy.multiply(beta, V, out=V)
            numpy.multiply(alpha, gradient, out=gradient)
            numpy.subtract(V, gradient, out=V)
            numpy.add(W, V, out=W)
            iter_barrier.wait()

    for t in worker_threads:
        t.join()

    print("current loss: " + str((W @ Xs - Ys) @ (W @ Xs - Ys).T + gamma * W @ W.T)) # report current loss
    # return the learned model
    return W

In [276]:
# customized hyperparameter tryout
sgd_mss_with_momentum_threaded(Xs=Xs_tr, Ys=Ys_tr, gamma=0.0001, W0=W0, alpha=0.001, beta=0.9, B=8, num_epochs=200, num_threads=8)

running thread  <Thread(Thread-7785, initial)>
running thread  <Thread(Thread-7786, initial)>
running thread  <Thread(Thread-7787, initial)>
running thread  <Thread(Thread-7788, initial)>
running thread  <Thread(Thread-7789, initial)>
running thread  <Thread(Thread-7790, initial)>
running thread  <Thread(Thread-7791, initial)>
running thread  <Thread(Thread-7792, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 200/200 [00:01<00:00, 114.38it/s]

current loss: [[37.16977821]]





array([[ 0.03756048,  0.12881889,  0.04096173,  0.25722494,  0.01240086,
        -0.11211072, -0.25063236,  0.15831235,  0.39259807,  0.24949661,
         0.13850566,  0.0515872 , -0.22609018, -0.02822651,  0.31771422,
         0.17147773,  0.20633168, -0.10351718,  0.09662696,  0.20787569,
         0.12132945, -0.12545416, -0.15158585, -0.06173128, -0.05760427,
         0.13556961,  0.0443049 ,  0.2185531 , -0.27029242, -0.11574262]])

In [161]:
# gradient descent to do the inner optimization step of Bayesian optimization
#
# objective     the objective function to minimize, as a function that takes a torch tensor and returns an expression
# x0            initial value to assign to variable (torch tensor)
# alpha         learning rate/step size
# num_iters     number of iterations of gradient descent
#
# returns     (obj_min, x_min), where
#       obj_min     the value of the objective after running iterations of gradient descent
#       x_min       the value of x after running iterations of gradient descent
def gradient_descent(objective, x0, alpha, num_iters):
    x = x0.detach().clone()  # create a fresh copy of x0
    x.requires_grad = True   # make it a target for differentiation
    opt = torch.optim.SGD([x], alpha)
    for it in range(num_iters):
        opt.zero_grad()
        f = objective(x)
        f.backward()
        opt.step()
    x.requires_grad = False  # make x no longer require gradients
    return (float(f.item()), x)

In [277]:
# compute the Gaussian RBF kernel matrix for a vector of data points (in PyTorch)
#
# Xs        points at which to compute the kernel (size: d x m)
# Zs        other points at which to compute the kernel (size: d x n)
# gamma     gamma parameter for the RBF kernel
#
# returns   an (m x n) matrix Sigma where Sigma[i,j] = K(Xs[:,i], Zs[:,j])
def rbf_kernel_matrix(Xs, Zs, gamma):
    m = Xs.shape[1] if len(Xs.shape) > 1 else 1
    n = Zs.shape[1] if len(Zs.shape) > 1 else 1
    sigma = [[torch.exp(-gamma * torch.linalg.norm(Xs[:, i] - Zs[:, j])**2) for j in range(n)] for i in range(m)]
    sigma = torch.tensor(sigma)
    return sigma

In [278]:
# compute the distribution predicted by a Gaussian process that uses an RBF kernel (in PyTorch)
#
# Xs            points at which to compute the kernel (size: d x n) where d is the number of parameters
# Ys            observed value at those points (size: n)
# gamma         gamma parameter for the RBF kernel
# sigma2_noise  the variance sigma^2 of the additive gaussian noise used in the model
#
# returns   a function that takes a value Xtest (size: d) and returns a tuple (mean, variance)
def gp_prediction(Xs, Ys, gamma, sigma2_noise):
    # first, do any work that can be shared among predictions
    sigma = rbf_kernel_matrix(Xs, Xs, gamma)
    n = Xs.shape[1]
    # next, define a nested function to return
    def prediction_mean_and_variance(Xtest):
        # construct mean and variance
        k = [torch.exp(-gamma * torch.linalg.norm(Xs[:, i] - Xtest)) for i in range(n)]
        k = torch.tensor(k)
        
        mean = k @ torch.linalg.inv(sigma + sigma2_noise * torch.eye(n)) @ Ys
        variance = torch.exp(-gamma * torch.linalg.norm(Xtest - Xtest)) + sigma2_noise -\
        k @ torch.linalg.inv(sigma + sigma2_noise * torch.eye(n)) @ k.T
        return (mean.reshape(()), variance.reshape(()))
    #finally, return the nested function
    return prediction_mean_and_variance

In [279]:
# run Bayesian optimization to minimize an objective
#
# objective     objective function; takes a torch tensor, returns a python float scalar
# d             dimension to optimize over
# gamma         gamma to use for RBF hyper-hyperparameter
# sigma2_noise  additive Gaussian noise parameter for Gaussian Process
# acquisition   acquisition function to use (e.g. ei_acquisition)
# random_x      function that returns a random sample of the parameter we're optimizing over (a torch tensor, e.g. for use in warmup)
# gd_nruns      number of random initializations we should use for gradient descent for the inner optimization step
# gd_alpha      learning rate for gradient descent
# gd_niters     number of iterations for gradient descent
# n_warmup      number of initial warmup evaluations of the objective to use
# num_iters     number of outer iterations of Bayes optimization to run (including warmup)
#
# returns       tuple of (y_best, x_best, Ys, Xs), where
#   y_best          objective value of best point found
#   x_best          best point found
#   Ys              vector of objective values for all points searched (size: num_iters)
#   Xs              matrix of all points searched (size: d x num_iters)
def bayes_opt(objective, d, gamma, sigma2_noise, acquisition, random_x, gd_nruns, gd_alpha, gd_niters, n_warmup, num_iters):
    y_best = float("inf")
    x_best = torch.zeros(size=(d,))
    Xs = []
    Ys = []
    # warm-up to prepare prior information for Bayesian Optimization
    for _ in range(n_warmup):
        x_i = torch.tensor([0.0001, 0.001, 0.9])
        y_i = objective(x_i)
        Xs.append(x_i)
        Ys.append(y_i)
        if y_i <= y_best:
            y_best = y_i
            x_best = x_i
    for _ in range(n_warmup, num_iters):
        Xs_vec = torch.stack(tensors=Xs, dim=1)
        Ys_vec = torch.tensor(Ys)
        prediction_fn =  gp_prediction(Xs_vec, Ys_vec, gamma, sigma2_noise)
        y = float("inf")
        x = torch.zeros(size=(d,))
        for _ in range(gd_nruns):
            x_0 = random_x(size=(d,))
            _, x_i = gradient_descent(objective=lambda x: acquisition(y_best, prediction_fn(x)[0], torch.sqrt(prediction_fn(x)[1])),\
                x0=x_0, alpha=gd_alpha, num_iters=gd_niters)
            y_i = objective(x_i)
            if y_i <= y:
                y = y_i
                x = x_i
        Xs.append(x)
        Ys.append(y)
        if y <= y_best:
            y_best = y
            x_best = x
    Xs_vec = torch.stack(tensors=Xs, dim=1)
    Ys_vec = torch.tensor(Ys)
    return y_best, x_best, Ys_vec, Xs_vec

In [280]:
# return a function that computes the lower confidence bound (LCB) acquisition function
#
# kappa     parameter for LCB
#
# returns   function that computes the LCB acquisition function
def lcb_acquisition(kappa):
    def A_lcb(Ybest, mean, stdev):
        return mean - kappa * stdev
    return A_lcb

In [286]:
# produce a function that runs SGD+Momentum on the receipts dataset, initializing the weights to zero
#
# mnist_dataset         the MNIST dataset, as returned by load_MNIST_dataset_with_validation_split
# num_epochs            number of epochs to run for
# B                     the batch size
#
# returns               a function that takes parameters
#   params                  a numpy vector of shape (3,) with entries that determine the hyperparameters, where
#       gamma = params[0]
#       alpha = params[1]
#       beta = params[2]
#                       and returns (the validation error of the final trained model after all the epochs) minus 0.9.
#                       if training diverged (i.e. any of the weights are non-finite) then return 0.1, which corresponds to an error of 1.
def receipts_dataset_sgd_mss_with_momentum(receipts_dataset, B, num_epochs, num_threads):
    def objective(params):
        Xs_tr, Xs_va, Ys_tr, Ys_va = receipts_dataset
        d = Xs_tr.shape[0]
        c = Ys_tr.shape[0]
        if torch.is_tensor(Xs_tr):
            Xs_tr = Xs_tr.numpy()
        if torch.is_tensor(Ys_tr):
            Ys_tr = Ys_tr.numpy()
        if torch.is_tensor(Xs_va):
            Xs_va = Xs_va.numpy()
        if torch.is_tensor(Ys_va):
            Ys_va = Ys_va.numpy()
        gamma, alpha, beta, W_0 = float(params[0].item()), float(params[1].item()), float(params[2].item()), numpy.zeros(shape=(c,d))
        W = sgd_mss_with_momentum_threaded(Xs=Xs_tr, Ys=Ys_tr, gamma=gamma, W0=W0, alpha=alpha, beta=beta,\
                                           B=B, num_epochs=num_epochs, num_threads=num_threads)
        Ys_pr = W @ Xs_va
        error = (numpy.sum((Ys_va - Ys_pr)**2) / numpy.sum((numpy.mean(Ys_va) - Ys_va)**2)) # use 1 - R^2 as error to select hyperparameters
        return float(error)
    return objective

In [287]:
# perform Bayesian Optimization to find optimal hyperparameters
obj = receipts_dataset_sgd_mss_with_momentum(receipts_dataset, B=8, num_epochs=40, num_threads=8)
(y_best, x_best, Ys_vec, Xs_vec) = bayes_opt(objective=obj, d=3, gamma=10, sigma2_noise=0.001, acquisition=lcb_acquisition(kappa=2.0),\
                                     random_x=torch.randn, gd_nruns=20, gd_alpha=0.01, gd_niters=20, n_warmup=3, num_iters=20)
print(y_best) # best R^2 score
print(x_best) # best hyperparameter set
print(Ys_vec) # R^2 score history
print(Xs_vec) # hyperparameter set history

running thread  <Thread(Thread-13289, initial)>
running thread  <Thread(Thread-13290, initial)>
running thread  <Thread(Thread-13291, initial)>
running thread  <Thread(Thread-13292, initial)>
running thread  <Thread(Thread-13293, initial)>
running thread  <Thread(Thread-13294, initial)>
running thread  <Thread(Thread-13295, initial)>
running thread  <Thread(Thread-13296, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 96.85it/s]


current loss: [[46.81227667]]
running thread  <Thread(Thread-13297, initial)>
running thread  <Thread(Thread-13298, initial)>
running thread  <Thread(Thread-13299, initial)>
running thread  <Thread(Thread-13300, initial)>
running thread  <Thread(Thread-13301, initial)>
running thread  <Thread(Thread-13302, initial)>
running thread  <Thread(Thread-13303, initial)>
running thread  <Thread(Thread-13304, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 118.15it/s]


current loss: [[48.52777061]]
running thread  <Thread(Thread-13305, initial)>
running thread  <Thread(Thread-13306, initial)>
running thread  <Thread(Thread-13307, initial)>
running thread  <Thread(Thread-13308, initial)>
running thread  <Thread(Thread-13309, initial)>
running thread  <Thread(Thread-13310, initial)>
running thread  <Thread(Thread-13311, initial)>
running thread  <Thread(Thread-13312, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 108.54it/s]


current loss: [[46.23667966]]
running thread  <Thread(Thread-13313, initial)>
running thread  <Thread(Thread-13314, initial)>
running thread  <Thread(Thread-13315, initial)>
running thread  <Thread(Thread-13316, initial)>
running thread  <Thread(Thread-13317, initial)>
running thread  <Thread(Thread-13318, initial)>
running thread  <Thread(Thread-13319, initial)>
running thread  <Thread(Thread-13320, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.74it/s]
  print("current loss: " + str((W @ Xs - Ys) @ (W @ Xs - Ys).T + gamma * W @ W.T)) # report current loss
  print("current loss: " + str((W @ Xs - Ys) @ (W @ Xs - Ys).T + gamma * W @ W.T)) # report current loss
  error = (numpy.sum((Ys_va - Ys_pr)**2) / numpy.sum((numpy.mean(Ys_va) - Ys_va)**2)) # use 1 - R^2 as error to select hyperparameters


current loss: [[nan]]
running thread  <Thread(Thread-13321, initial)>
running thread  <Thread(Thread-13322, initial)>
running thread  <Thread(Thread-13323, initial)>
running thread  <Thread(Thread-13324, initial)>
running thread  <Thread(Thread-13325, initial)>
running thread  <Thread(Thread-13326, initial)>
running thread  <Thread(Thread-13327, initial)>
running thread  <Thread(Thread-13328, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.57it/s]


current loss: [[4.19915757e+108]]
running thread  <Thread(Thread-13329, initial)>
running thread  <Thread(Thread-13330, initial)>
running thread  <Thread(Thread-13331, initial)>
running thread  <Thread(Thread-13332, initial)>
running thread  <Thread(Thread-13333, initial)>
running thread  <Thread(Thread-13334, initial)>
running thread  <Thread(Thread-13335, initial)>
running thread  <Thread(Thread-13336, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.21it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13337, initial)>
running thread  <Thread(Thread-13338, initial)>
running thread  <Thread(Thread-13339, initial)>
running thread  <Thread(Thread-13340, initial)>
running thread  <Thread(Thread-13341, initial)>
running thread  <Thread(Thread-13342, initial)>
running thread  <Thread(Thread-13343, initial)>
running thread  <Thread(Thread-13344, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 116.38it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13345, initial)>
running thread  <Thread(Thread-13346, initial)>
running thread  <Thread(Thread-13347, initial)>
running thread  <Thread(Thread-13348, initial)>
running thread  <Thread(Thread-13349, initial)>
running thread  <Thread(Thread-13350, initial)>
running thread  <Thread(Thread-13351, initial)>
running thread  <Thread(Thread-13352, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 119.69it/s]


current loss: [[2.02158371e+131]]
running thread  <Thread(Thread-13353, initial)>
running thread  <Thread(Thread-13354, initial)>
running thread  <Thread(Thread-13355, initial)>
running thread  <Thread(Thread-13356, initial)>
running thread  <Thread(Thread-13357, initial)>
running thread  <Thread(Thread-13358, initial)>
running thread  <Thread(Thread-13359, initial)>
running thread  <Thread(Thread-13360, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.41it/s]


current loss: [[nan]]
running thread  <Thread(Thread-13361, initial)>
running thread  <Thread(Thread-13362, initial)>
running thread  <Thread(Thread-13363, initial)>
running thread  <Thread(Thread-13364, initial)>
running thread  <Thread(Thread-13365, initial)>
running thread  <Thread(Thread-13366, initial)>
running thread  <Thread(Thread-13367, initial)>
running thread  <Thread(Thread-13368, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 107.26it/s]


current loss: [[1.95649057e+291]]
running thread  <Thread(Thread-13369, initial)>
running thread  <Thread(Thread-13370, initial)>
running thread  <Thread(Thread-13371, initial)>
running thread  <Thread(Thread-13372, initial)>
running thread  <Thread(Thread-13373, initial)>
running thread  <Thread(Thread-13374, initial)>
running thread  <Thread(Thread-13375, initial)>
running thread  <Thread(Thread-13376, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.11it/s] 


current loss: [[4.98025134e+120]]
running thread  <Thread(Thread-13377, initial)>
running thread  <Thread(Thread-13378, initial)>
running thread  <Thread(Thread-13379, initial)>
running thread  <Thread(Thread-13380, initial)>
running thread  <Thread(Thread-13381, initial)>
running thread  <Thread(Thread-13382, initial)>
running thread  <Thread(Thread-13383, initial)>
running thread  <Thread(Thread-13384, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.03it/s]


current loss: [[1.65202974e+111]]
running thread  <Thread(Thread-13385, initial)>
running thread  <Thread(Thread-13386, initial)>
running thread  <Thread(Thread-13387, initial)>
running thread  <Thread(Thread-13388, initial)>
running thread  <Thread(Thread-13389, initial)>
running thread  <Thread(Thread-13390, initial)>
running thread  <Thread(Thread-13391, initial)>
running thread  <Thread(Thread-13392, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.17it/s]


current loss: [[3.30006067e+175]]
running thread  <Thread(Thread-13393, initial)>
running thread  <Thread(Thread-13394, initial)>
running thread  <Thread(Thread-13395, initial)>
running thread  <Thread(Thread-13396, initial)>
running thread  <Thread(Thread-13397, initial)>
running thread  <Thread(Thread-13398, initial)>
running thread  <Thread(Thread-13399, initial)>
running thread  <Thread(Thread-13400, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.94it/s]


current loss: [[nan]]
running thread  <Thread(Thread-13401, initial)>
running thread  <Thread(Thread-13402, initial)>
running thread  <Thread(Thread-13403, initial)>
running thread  <Thread(Thread-13404, initial)>
running thread  <Thread(Thread-13405, initial)>
running thread  <Thread(Thread-13406, initial)>
running thread  <Thread(Thread-13407, initial)>
running thread  <Thread(Thread-13408, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.59it/s]


current loss: [[nan]]
running thread  <Thread(Thread-13409, initial)>
running thread  <Thread(Thread-13410, initial)>
running thread  <Thread(Thread-13411, initial)>
running thread  <Thread(Thread-13412, initial)>
running thread  <Thread(Thread-13413, initial)>
running thread  <Thread(Thread-13414, initial)>
running thread  <Thread(Thread-13415, initial)>
running thread  <Thread(Thread-13416, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.28it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13417, initial)>
running thread  <Thread(Thread-13418, initial)>
running thread  <Thread(Thread-13419, initial)>
running thread  <Thread(Thread-13420, initial)>
running thread  <Thread(Thread-13421, initial)>
running thread  <Thread(Thread-13422, initial)>
running thread  <Thread(Thread-13423, initial)>
running thread  <Thread(Thread-13424, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 114.15it/s]


current loss: [[2.12480731e+293]]
running thread  <Thread(Thread-13425, initial)>
running thread  <Thread(Thread-13426, initial)>
running thread  <Thread(Thread-13427, initial)>
running thread  <Thread(Thread-13428, initial)>
running thread  <Thread(Thread-13429, initial)>
running thread  <Thread(Thread-13430, initial)>
running thread  <Thread(Thread-13431, initial)>
running thread  <Thread(Thread-13432, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.38it/s]


current loss: [[1.12657782e+176]]
running thread  <Thread(Thread-13433, initial)>
running thread  <Thread(Thread-13434, initial)>
running thread  <Thread(Thread-13435, initial)>
running thread  <Thread(Thread-13436, initial)>
running thread  <Thread(Thread-13437, initial)>
running thread  <Thread(Thread-13438, initial)>
running thread  <Thread(Thread-13439, initial)>
running thread  <Thread(Thread-13440, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.94it/s]


current loss: [[3.57876929e+121]]
running thread  <Thread(Thread-13441, initial)>
running thread  <Thread(Thread-13442, initial)>
running thread  <Thread(Thread-13443, initial)>
running thread  <Thread(Thread-13444, initial)>
running thread  <Thread(Thread-13445, initial)>
running thread  <Thread(Thread-13446, initial)>
running thread  <Thread(Thread-13447, initial)>
running thread  <Thread(Thread-13448, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.25it/s]


current loss: [[3.7852154e+83]]
running thread  <Thread(Thread-13449, initial)>
running thread  <Thread(Thread-13450, initial)>
running thread  <Thread(Thread-13451, initial)>
running thread  <Thread(Thread-13452, initial)>
running thread  <Thread(Thread-13453, initial)>
running thread  <Thread(Thread-13454, initial)>
running thread  <Thread(Thread-13455, initial)>
running thread  <Thread(Thread-13456, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.87it/s]


current loss: [[49.73982632]]
running thread  <Thread(Thread-13457, initial)>
running thread  <Thread(Thread-13458, initial)>
running thread  <Thread(Thread-13459, initial)>
running thread  <Thread(Thread-13460, initial)>
running thread  <Thread(Thread-13461, initial)>
running thread  <Thread(Thread-13462, initial)>
running thread  <Thread(Thread-13463, initial)>
running thread  <Thread(Thread-13464, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.07it/s]


current loss: [[3.00215506e+171]]
running thread  <Thread(Thread-13465, initial)>
running thread  <Thread(Thread-13466, initial)>
running thread  <Thread(Thread-13467, initial)>
running thread  <Thread(Thread-13468, initial)>
running thread  <Thread(Thread-13469, initial)>
running thread  <Thread(Thread-13470, initial)>
running thread  <Thread(Thread-13471, initial)>
running thread  <Thread(Thread-13472, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 100.13it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13473, initial)>
running thread  <Thread(Thread-13474, initial)>
running thread  <Thread(Thread-13475, initial)>
running thread  <Thread(Thread-13476, initial)>
running thread  <Thread(Thread-13477, initial)>
running thread  <Thread(Thread-13478, initial)>
running thread  <Thread(Thread-13479, initial)>
running thread  <Thread(Thread-13480, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.42it/s]


current loss: [[2.56603748e+122]]
running thread  <Thread(Thread-13481, initial)>
running thread  <Thread(Thread-13482, initial)>
running thread  <Thread(Thread-13483, initial)>
running thread  <Thread(Thread-13484, initial)>
running thread  <Thread(Thread-13485, initial)>
running thread  <Thread(Thread-13486, initial)>
running thread  <Thread(Thread-13487, initial)>
running thread  <Thread(Thread-13488, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.56it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13489, initial)>
running thread  <Thread(Thread-13490, initial)>
running thread  <Thread(Thread-13491, initial)>
running thread  <Thread(Thread-13492, initial)>
running thread  <Thread(Thread-13493, initial)>
running thread  <Thread(Thread-13494, initial)>
running thread  <Thread(Thread-13495, initial)>
running thread  <Thread(Thread-13496, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 114.73it/s]


current loss: [[5.07074692e+93]]
running thread  <Thread(Thread-13497, initial)>
running thread  <Thread(Thread-13498, initial)>
running thread  <Thread(Thread-13499, initial)>
running thread  <Thread(Thread-13500, initial)>
running thread  <Thread(Thread-13501, initial)>
running thread  <Thread(Thread-13502, initial)>
running thread  <Thread(Thread-13503, initial)>
running thread  <Thread(Thread-13504, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.58it/s]


current loss: [[nan]]
running thread  <Thread(Thread-13505, initial)>
running thread  <Thread(Thread-13506, initial)>
running thread  <Thread(Thread-13507, initial)>
running thread  <Thread(Thread-13508, initial)>
running thread  <Thread(Thread-13509, initial)>
running thread  <Thread(Thread-13510, initial)>
running thread  <Thread(Thread-13511, initial)>
running thread  <Thread(Thread-13512, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.88it/s]


current loss: [[7.17483946e+246]]
running thread  <Thread(Thread-13513, initial)>
running thread  <Thread(Thread-13514, initial)>
running thread  <Thread(Thread-13515, initial)>
running thread  <Thread(Thread-13516, initial)>
running thread  <Thread(Thread-13517, initial)>
running thread  <Thread(Thread-13518, initial)>
running thread  <Thread(Thread-13519, initial)>
running thread  <Thread(Thread-13520, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.70it/s]


current loss: [[nan]]
running thread  <Thread(Thread-13521, initial)>
running thread  <Thread(Thread-13522, initial)>
running thread  <Thread(Thread-13523, initial)>
running thread  <Thread(Thread-13524, initial)>
running thread  <Thread(Thread-13525, initial)>
running thread  <Thread(Thread-13526, initial)>
running thread  <Thread(Thread-13527, initial)>
running thread  <Thread(Thread-13528, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 96.27it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13529, initial)>
running thread  <Thread(Thread-13530, initial)>
running thread  <Thread(Thread-13531, initial)>
running thread  <Thread(Thread-13532, initial)>
running thread  <Thread(Thread-13533, initial)>
running thread  <Thread(Thread-13534, initial)>
running thread  <Thread(Thread-13535, initial)>
running thread  <Thread(Thread-13536, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 123.46it/s]


current loss: [[2.52966939e+211]]
running thread  <Thread(Thread-13537, initial)>
running thread  <Thread(Thread-13538, initial)>
running thread  <Thread(Thread-13539, initial)>
running thread  <Thread(Thread-13540, initial)>
running thread  <Thread(Thread-13541, initial)>
running thread  <Thread(Thread-13542, initial)>
running thread  <Thread(Thread-13543, initial)>
running thread  <Thread(Thread-13544, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.24it/s]


current loss: [[32334.53833229]]
running thread  <Thread(Thread-13545, initial)>
running thread  <Thread(Thread-13546, initial)>
running thread  <Thread(Thread-13547, initial)>
running thread  <Thread(Thread-13548, initial)>
running thread  <Thread(Thread-13549, initial)>
running thread  <Thread(Thread-13550, initial)>
running thread  <Thread(Thread-13551, initial)>
running thread  <Thread(Thread-13552, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 120.91it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13553, initial)>
running thread  <Thread(Thread-13554, initial)>
running thread  <Thread(Thread-13555, initial)>
running thread  <Thread(Thread-13556, initial)>
running thread  <Thread(Thread-13557, initial)>
running thread  <Thread(Thread-13558, initial)>
running thread  <Thread(Thread-13559, initial)>
running thread  <Thread(Thread-13560, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.98it/s]


current loss: [[1.32472121e+208]]
running thread  <Thread(Thread-13561, initial)>
running thread  <Thread(Thread-13562, initial)>
running thread  <Thread(Thread-13563, initial)>
running thread  <Thread(Thread-13564, initial)>
running thread  <Thread(Thread-13565, initial)>
running thread  <Thread(Thread-13566, initial)>
running thread  <Thread(Thread-13567, initial)>
running thread  <Thread(Thread-13568, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 102.89it/s]


current loss: [[1.5456189e+09]]
running thread  <Thread(Thread-13569, initial)>
running thread  <Thread(Thread-13570, initial)>
running thread  <Thread(Thread-13571, initial)>
running thread  <Thread(Thread-13572, initial)>
running thread  <Thread(Thread-13573, initial)>
running thread  <Thread(Thread-13574, initial)>
running thread  <Thread(Thread-13575, initial)>
running thread  <Thread(Thread-13576, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 107.10it/s]


current loss: [[2.56955742e+24]]
running thread  <Thread(Thread-13577, initial)>
running thread  <Thread(Thread-13578, initial)>
running thread  <Thread(Thread-13579, initial)>
running thread  <Thread(Thread-13580, initial)>
running thread  <Thread(Thread-13581, initial)>
running thread  <Thread(Thread-13582, initial)>
running thread  <Thread(Thread-13583, initial)>
running thread  <Thread(Thread-13584, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 107.05it/s]


current loss: [[6.43403427e+59]]
running thread  <Thread(Thread-13585, initial)>
running thread  <Thread(Thread-13586, initial)>
running thread  <Thread(Thread-13587, initial)>
running thread  <Thread(Thread-13588, initial)>
running thread  <Thread(Thread-13589, initial)>
running thread  <Thread(Thread-13590, initial)>
running thread  <Thread(Thread-13591, initial)>
running thread  <Thread(Thread-13592, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.02it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13593, initial)>
running thread  <Thread(Thread-13594, initial)>
running thread  <Thread(Thread-13595, initial)>
running thread  <Thread(Thread-13596, initial)>
running thread  <Thread(Thread-13597, initial)>
running thread  <Thread(Thread-13598, initial)>
running thread  <Thread(Thread-13599, initial)>
running thread  <Thread(Thread-13600, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.79it/s]


current loss: [[nan]]
running thread  <Thread(Thread-13601, initial)>
running thread  <Thread(Thread-13602, initial)>
running thread  <Thread(Thread-13603, initial)>
running thread  <Thread(Thread-13604, initial)>
running thread  <Thread(Thread-13605, initial)>
running thread  <Thread(Thread-13606, initial)>
running thread  <Thread(Thread-13607, initial)>
running thread  <Thread(Thread-13608, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.76it/s] 


current loss: [[inf]]
running thread  <Thread(Thread-13609, initial)>
running thread  <Thread(Thread-13610, initial)>
running thread  <Thread(Thread-13611, initial)>
running thread  <Thread(Thread-13612, initial)>
running thread  <Thread(Thread-13613, initial)>
running thread  <Thread(Thread-13614, initial)>
running thread  <Thread(Thread-13615, initial)>
running thread  <Thread(Thread-13616, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 128.87it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13617, initial)>
running thread  <Thread(Thread-13618, initial)>
running thread  <Thread(Thread-13619, initial)>
running thread  <Thread(Thread-13620, initial)>
running thread  <Thread(Thread-13621, initial)>
running thread  <Thread(Thread-13622, initial)>
running thread  <Thread(Thread-13623, initial)>
running thread  <Thread(Thread-13624, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 107.08it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13625, initial)>
running thread  <Thread(Thread-13626, initial)>
running thread  <Thread(Thread-13627, initial)>
running thread  <Thread(Thread-13628, initial)>
running thread  <Thread(Thread-13629, initial)>
running thread  <Thread(Thread-13630, initial)>
running thread  <Thread(Thread-13631, initial)>
running thread  <Thread(Thread-13632, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.53it/s]


current loss: [[5.08474822e+39]]
running thread  <Thread(Thread-13633, initial)>
running thread  <Thread(Thread-13634, initial)>
running thread  <Thread(Thread-13635, initial)>
running thread  <Thread(Thread-13636, initial)>
running thread  <Thread(Thread-13637, initial)>
running thread  <Thread(Thread-13638, initial)>
running thread  <Thread(Thread-13639, initial)>
running thread  <Thread(Thread-13640, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 102.35it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13641, initial)>
running thread  <Thread(Thread-13642, initial)>
running thread  <Thread(Thread-13643, initial)>
running thread  <Thread(Thread-13644, initial)>
running thread  <Thread(Thread-13645, initial)>
running thread  <Thread(Thread-13646, initial)>
running thread  <Thread(Thread-13647, initial)>
running thread  <Thread(Thread-13648, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.33it/s]


current loss: [[7.22707435e+195]]
running thread  <Thread(Thread-13649, initial)>
running thread  <Thread(Thread-13650, initial)>
running thread  <Thread(Thread-13651, initial)>
running thread  <Thread(Thread-13652, initial)>
running thread  <Thread(Thread-13653, initial)>
running thread  <Thread(Thread-13654, initial)>
running thread  <Thread(Thread-13655, initial)>
running thread  <Thread(Thread-13656, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.93it/s] 


current loss: [[nan]]
running thread  <Thread(Thread-13657, initial)>
running thread  <Thread(Thread-13658, initial)>
running thread  <Thread(Thread-13659, initial)>
running thread  <Thread(Thread-13660, initial)>
running thread  <Thread(Thread-13661, initial)>
running thread  <Thread(Thread-13662, initial)>
running thread  <Thread(Thread-13663, initial)>
running thread  <Thread(Thread-13664, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.47it/s]
  Ys_pr = W @ Xs_va
  Ys_pr = W @ Xs_va


current loss: [[nan]]
running thread  <Thread(Thread-13665, initial)>
running thread  <Thread(Thread-13666, initial)>
running thread  <Thread(Thread-13667, initial)>
running thread  <Thread(Thread-13668, initial)>
running thread  <Thread(Thread-13669, initial)>
running thread  <Thread(Thread-13670, initial)>
running thread  <Thread(Thread-13671, initial)>
running thread  <Thread(Thread-13672, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


  numpy.add(multinomial_logreg_grad_i, gammaW, out=multinomial_logreg_grad_i)
  numpy.add(gradient, multinomial_logreg_grad_i, out=gradient)
  numpy.add(gradient, multinomial_logreg_grad_i, out=gradient)
  numpy.multiply(gradient, 0, out=gradient)
100%|██████████| 40/40 [00:00<00:00, 134.65it/s]


current loss: [[nan]]
running thread  <Thread(Thread-13673, initial)>
running thread  <Thread(Thread-13674, initial)>
running thread  <Thread(Thread-13675, initial)>
running thread  <Thread(Thread-13676, initial)>
running thread  <Thread(Thread-13677, initial)>
running thread  <Thread(Thread-13678, initial)>
running thread  <Thread(Thread-13679, initial)>
running thread  <Thread(Thread-13680, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.02it/s] 


current loss: [[1.00645659e+51]]
running thread  <Thread(Thread-13681, initial)>
running thread  <Thread(Thread-13682, initial)>
running thread  <Thread(Thread-13683, initial)>
running thread  <Thread(Thread-13684, initial)>
running thread  <Thread(Thread-13685, initial)>
running thread  <Thread(Thread-13686, initial)>
running thread  <Thread(Thread-13687, initial)>
running thread  <Thread(Thread-13688, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 109.78it/s]


current loss: [[2.56264984e+206]]
running thread  <Thread(Thread-13689, initial)>
running thread  <Thread(Thread-13690, initial)>
running thread  <Thread(Thread-13691, initial)>
running thread  <Thread(Thread-13692, initial)>
running thread  <Thread(Thread-13693, initial)>
running thread  <Thread(Thread-13694, initial)>
running thread  <Thread(Thread-13695, initial)>
running thread  <Thread(Thread-13696, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 102.97it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13697, initial)>
running thread  <Thread(Thread-13698, initial)>
running thread  <Thread(Thread-13699, initial)>
running thread  <Thread(Thread-13700, initial)>
running thread  <Thread(Thread-13701, initial)>
running thread  <Thread(Thread-13702, initial)>
running thread  <Thread(Thread-13703, initial)>
running thread  <Thread(Thread-13704, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.08it/s]


current loss: [[7.30016428e+168]]
running thread  <Thread(Thread-13705, initial)>
running thread  <Thread(Thread-13706, initial)>
running thread  <Thread(Thread-13707, initial)>
running thread  <Thread(Thread-13708, initial)>
running thread  <Thread(Thread-13709, initial)>
running thread  <Thread(Thread-13710, initial)>
running thread  <Thread(Thread-13711, initial)>
running thread  <Thread(Thread-13712, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.67it/s]


current loss: [[1.45856347e+280]]
running thread  <Thread(Thread-13713, initial)>
running thread  <Thread(Thread-13714, initial)>
running thread  <Thread(Thread-13715, initial)>
running thread  <Thread(Thread-13716, initial)>
running thread  <Thread(Thread-13717, initial)>
running thread  <Thread(Thread-13718, initial)>
running thread  <Thread(Thread-13719, initial)>
running thread  <Thread(Thread-13720, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.53it/s]


current loss: [[nan]]
running thread  <Thread(Thread-13721, initial)>
running thread  <Thread(Thread-13722, initial)>
running thread  <Thread(Thread-13723, initial)>
running thread  <Thread(Thread-13724, initial)>
running thread  <Thread(Thread-13725, initial)>
running thread  <Thread(Thread-13726, initial)>
running thread  <Thread(Thread-13727, initial)>
running thread  <Thread(Thread-13728, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 108.47it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13729, initial)>
running thread  <Thread(Thread-13730, initial)>
running thread  <Thread(Thread-13731, initial)>
running thread  <Thread(Thread-13732, initial)>
running thread  <Thread(Thread-13733, initial)>
running thread  <Thread(Thread-13734, initial)>
running thread  <Thread(Thread-13735, initial)>
running thread  <Thread(Thread-13736, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.47it/s]


current loss: [[9.36092991e+82]]
running thread  <Thread(Thread-13737, initial)>
running thread  <Thread(Thread-13738, initial)>
running thread  <Thread(Thread-13739, initial)>
running thread  <Thread(Thread-13740, initial)>
running thread  <Thread(Thread-13741, initial)>
running thread  <Thread(Thread-13742, initial)>
running thread  <Thread(Thread-13743, initial)>
running thread  <Thread(Thread-13744, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 108.91it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13745, initial)>
running thread  <Thread(Thread-13746, initial)>
running thread  <Thread(Thread-13747, initial)>
running thread  <Thread(Thread-13748, initial)>
running thread  <Thread(Thread-13749, initial)>
running thread  <Thread(Thread-13750, initial)>
running thread  <Thread(Thread-13751, initial)>
running thread  <Thread(Thread-13752, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.23it/s]


current loss: [[6.99632209e+211]]
running thread  <Thread(Thread-13753, initial)>
running thread  <Thread(Thread-13754, initial)>
running thread  <Thread(Thread-13755, initial)>
running thread  <Thread(Thread-13756, initial)>
running thread  <Thread(Thread-13757, initial)>
running thread  <Thread(Thread-13758, initial)>
running thread  <Thread(Thread-13759, initial)>
running thread  <Thread(Thread-13760, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 91.94it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13761, initial)>
running thread  <Thread(Thread-13762, initial)>
running thread  <Thread(Thread-13763, initial)>
running thread  <Thread(Thread-13764, initial)>
running thread  <Thread(Thread-13765, initial)>
running thread  <Thread(Thread-13766, initial)>
running thread  <Thread(Thread-13767, initial)>
running thread  <Thread(Thread-13768, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.35it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13769, initial)>
running thread  <Thread(Thread-13770, initial)>
running thread  <Thread(Thread-13771, initial)>
running thread  <Thread(Thread-13772, initial)>
running thread  <Thread(Thread-13773, initial)>
running thread  <Thread(Thread-13774, initial)>
running thread  <Thread(Thread-13775, initial)>
running thread  <Thread(Thread-13776, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.46it/s]


current loss: [[1.02848036e+208]]
running thread  <Thread(Thread-13777, initial)>
running thread  <Thread(Thread-13778, initial)>
running thread  <Thread(Thread-13779, initial)>
running thread  <Thread(Thread-13780, initial)>
running thread  <Thread(Thread-13781, initial)>
running thread  <Thread(Thread-13782, initial)>
running thread  <Thread(Thread-13783, initial)>
running thread  <Thread(Thread-13784, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 128.23it/s]


current loss: [[2.22209368e+60]]
running thread  <Thread(Thread-13785, initial)>
running thread  <Thread(Thread-13786, initial)>
running thread  <Thread(Thread-13787, initial)>
running thread  <Thread(Thread-13788, initial)>
running thread  <Thread(Thread-13789, initial)>
running thread  <Thread(Thread-13790, initial)>
running thread  <Thread(Thread-13791, initial)>
running thread  <Thread(Thread-13792, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 94.83it/s]


current loss: [[34089234.7332238]]
running thread  <Thread(Thread-13793, initial)>
running thread  <Thread(Thread-13794, initial)>
running thread  <Thread(Thread-13795, initial)>
running thread  <Thread(Thread-13796, initial)>
running thread  <Thread(Thread-13797, initial)>
running thread  <Thread(Thread-13798, initial)>
running thread  <Thread(Thread-13799, initial)>
running thread  <Thread(Thread-13800, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.55it/s]


current loss: [[2.78880769e+233]]
running thread  <Thread(Thread-13801, initial)>
running thread  <Thread(Thread-13802, initial)>
running thread  <Thread(Thread-13803, initial)>
running thread  <Thread(Thread-13804, initial)>
running thread  <Thread(Thread-13805, initial)>
running thread  <Thread(Thread-13806, initial)>
running thread  <Thread(Thread-13807, initial)>
running thread  <Thread(Thread-13808, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.10it/s]


current loss: [[9.58128032e+183]]
running thread  <Thread(Thread-13809, initial)>
running thread  <Thread(Thread-13810, initial)>
running thread  <Thread(Thread-13811, initial)>
running thread  <Thread(Thread-13812, initial)>
running thread  <Thread(Thread-13813, initial)>
running thread  <Thread(Thread-13814, initial)>
running thread  <Thread(Thread-13815, initial)>
running thread  <Thread(Thread-13816, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.04it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13817, initial)>
running thread  <Thread(Thread-13818, initial)>
running thread  <Thread(Thread-13819, initial)>
running thread  <Thread(Thread-13820, initial)>
running thread  <Thread(Thread-13821, initial)>
running thread  <Thread(Thread-13822, initial)>
running thread  <Thread(Thread-13823, initial)>
running thread  <Thread(Thread-13824, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.06it/s]


current loss: [[nan]]
running thread  <Thread(Thread-13825, initial)>
running thread  <Thread(Thread-13826, initial)>
running thread  <Thread(Thread-13827, initial)>
running thread  <Thread(Thread-13828, initial)>
running thread  <Thread(Thread-13829, initial)>
running thread  <Thread(Thread-13830, initial)>
running thread  <Thread(Thread-13831, initial)>
running thread  <Thread(Thread-13832, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.45it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13833, initial)>
running thread  <Thread(Thread-13834, initial)>
running thread  <Thread(Thread-13835, initial)>
running thread  <Thread(Thread-13836, initial)>
running thread  <Thread(Thread-13837, initial)>
running thread  <Thread(Thread-13838, initial)>
running thread  <Thread(Thread-13839, initial)>
running thread  <Thread(Thread-13840, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 113.13it/s]


current loss: [[nan]]
running thread  <Thread(Thread-13841, initial)>
running thread  <Thread(Thread-13842, initial)>
running thread  <Thread(Thread-13843, initial)>
running thread  <Thread(Thread-13844, initial)>
running thread  <Thread(Thread-13845, initial)>
running thread  <Thread(Thread-13846, initial)>
running thread  <Thread(Thread-13847, initial)>
running thread  <Thread(Thread-13848, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.82it/s]


current loss: [[2.18456266e+224]]
running thread  <Thread(Thread-13849, initial)>
running thread  <Thread(Thread-13850, initial)>
running thread  <Thread(Thread-13851, initial)>
running thread  <Thread(Thread-13852, initial)>
running thread  <Thread(Thread-13853, initial)>
running thread  <Thread(Thread-13854, initial)>
running thread  <Thread(Thread-13855, initial)>
running thread  <Thread(Thread-13856, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.95it/s]


current loss: [[5.09351623e+304]]
running thread  <Thread(Thread-13857, initial)>
running thread  <Thread(Thread-13858, initial)>
running thread  <Thread(Thread-13859, initial)>
running thread  <Thread(Thread-13860, initial)>
running thread  <Thread(Thread-13861, initial)>
running thread  <Thread(Thread-13862, initial)>
running thread  <Thread(Thread-13863, initial)>
running thread  <Thread(Thread-13864, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 107.54it/s]


current loss: [[371196.84636616]]
running thread  <Thread(Thread-13865, initial)>
running thread  <Thread(Thread-13866, initial)>
running thread  <Thread(Thread-13867, initial)>
running thread  <Thread(Thread-13868, initial)>
running thread  <Thread(Thread-13869, initial)>
running thread  <Thread(Thread-13870, initial)>
running thread  <Thread(Thread-13871, initial)>
running thread  <Thread(Thread-13872, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 107.17it/s]


current loss: [[392.09875496]]
running thread  <Thread(Thread-13873, initial)>
running thread  <Thread(Thread-13874, initial)>
running thread  <Thread(Thread-13875, initial)>
running thread  <Thread(Thread-13876, initial)>
running thread  <Thread(Thread-13877, initial)>
running thread  <Thread(Thread-13878, initial)>
running thread  <Thread(Thread-13879, initial)>
running thread  <Thread(Thread-13880, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 127.41it/s]


current loss: [[3.63048817e+169]]
running thread  <Thread(Thread-13881, initial)>
running thread  <Thread(Thread-13882, initial)>
running thread  <Thread(Thread-13883, initial)>
running thread  <Thread(Thread-13884, initial)>
running thread  <Thread(Thread-13885, initial)>
running thread  <Thread(Thread-13886, initial)>
running thread  <Thread(Thread-13887, initial)>
running thread  <Thread(Thread-13888, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.73it/s]


current loss: [[9.32304706e+296]]
running thread  <Thread(Thread-13889, initial)>
running thread  <Thread(Thread-13890, initial)>
running thread  <Thread(Thread-13891, initial)>
running thread  <Thread(Thread-13892, initial)>
running thread  <Thread(Thread-13893, initial)>
running thread  <Thread(Thread-13894, initial)>
running thread  <Thread(Thread-13895, initial)>
running thread  <Thread(Thread-13896, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 116.30it/s]


current loss: [[1.44456187e+104]]
running thread  <Thread(Thread-13897, initial)>
running thread  <Thread(Thread-13898, initial)>
running thread  <Thread(Thread-13899, initial)>
running thread  <Thread(Thread-13900, initial)>
running thread  <Thread(Thread-13901, initial)>
running thread  <Thread(Thread-13902, initial)>
running thread  <Thread(Thread-13903, initial)>
running thread  <Thread(Thread-13904, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 113.28it/s]


current loss: [[2.54196191e+163]]
running thread  <Thread(Thread-13905, initial)>
running thread  <Thread(Thread-13906, initial)>
running thread  <Thread(Thread-13907, initial)>
running thread  <Thread(Thread-13908, initial)>
running thread  <Thread(Thread-13909, initial)>
running thread  <Thread(Thread-13910, initial)>
running thread  <Thread(Thread-13911, initial)>
running thread  <Thread(Thread-13912, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 102.12it/s]


current loss: [[2.83356892e+83]]
running thread  <Thread(Thread-13913, initial)>
running thread  <Thread(Thread-13914, initial)>
running thread  <Thread(Thread-13915, initial)>
running thread  <Thread(Thread-13916, initial)>
running thread  <Thread(Thread-13917, initial)>
running thread  <Thread(Thread-13918, initial)>
running thread  <Thread(Thread-13919, initial)>
running thread  <Thread(Thread-13920, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 126.08it/s]


current loss: [[186006.7515509]]
running thread  <Thread(Thread-13921, initial)>
running thread  <Thread(Thread-13922, initial)>
running thread  <Thread(Thread-13923, initial)>
running thread  <Thread(Thread-13924, initial)>
running thread  <Thread(Thread-13925, initial)>
running thread  <Thread(Thread-13926, initial)>
running thread  <Thread(Thread-13927, initial)>
running thread  <Thread(Thread-13928, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 96.59it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13929, initial)>
running thread  <Thread(Thread-13930, initial)>
running thread  <Thread(Thread-13931, initial)>
running thread  <Thread(Thread-13932, initial)>
running thread  <Thread(Thread-13933, initial)>
running thread  <Thread(Thread-13934, initial)>
running thread  <Thread(Thread-13935, initial)>
running thread  <Thread(Thread-13936, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 128.47it/s]


current loss: [[1.63470992e+162]]
running thread  <Thread(Thread-13937, initial)>
running thread  <Thread(Thread-13938, initial)>
running thread  <Thread(Thread-13939, initial)>
running thread  <Thread(Thread-13940, initial)>
running thread  <Thread(Thread-13941, initial)>
running thread  <Thread(Thread-13942, initial)>
running thread  <Thread(Thread-13943, initial)>
running thread  <Thread(Thread-13944, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 109.68it/s]


current loss: [[9.69922678e+302]]
running thread  <Thread(Thread-13945, initial)>
running thread  <Thread(Thread-13946, initial)>
running thread  <Thread(Thread-13947, initial)>
running thread  <Thread(Thread-13948, initial)>
running thread  <Thread(Thread-13949, initial)>
running thread  <Thread(Thread-13950, initial)>
running thread  <Thread(Thread-13951, initial)>
running thread  <Thread(Thread-13952, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 109.05it/s]


current loss: [[inf]]
running thread  <Thread(Thread-13953, initial)>
running thread  <Thread(Thread-13954, initial)>
running thread  <Thread(Thread-13955, initial)>
running thread  <Thread(Thread-13956, initial)>
running thread  <Thread(Thread-13957, initial)>
running thread  <Thread(Thread-13958, initial)>
running thread  <Thread(Thread-13959, initial)>
running thread  <Thread(Thread-13960, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 118.40it/s]


current loss: [[1.28657559e+123]]
running thread  <Thread(Thread-13961, initial)>
running thread  <Thread(Thread-13962, initial)>
running thread  <Thread(Thread-13963, initial)>
running thread  <Thread(Thread-13964, initial)>
running thread  <Thread(Thread-13965, initial)>
running thread  <Thread(Thread-13966, initial)>
running thread  <Thread(Thread-13967, initial)>
running thread  <Thread(Thread-13968, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 134.97it/s]


current loss: [[5.98335006e+88]]
running thread  <Thread(Thread-13969, initial)>
running thread  <Thread(Thread-13970, initial)>
running thread  <Thread(Thread-13971, initial)>
running thread  <Thread(Thread-13972, initial)>
running thread  <Thread(Thread-13973, initial)>
running thread  <Thread(Thread-13974, initial)>
running thread  <Thread(Thread-13975, initial)>
running thread  <Thread(Thread-13976, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 143.59it/s]


current loss: [[nan]]
running thread  <Thread(Thread-13977, initial)>
running thread  <Thread(Thread-13978, initial)>
running thread  <Thread(Thread-13979, initial)>
running thread  <Thread(Thread-13980, initial)>
running thread  <Thread(Thread-13981, initial)>
running thread  <Thread(Thread-13982, initial)>
running thread  <Thread(Thread-13983, initial)>
running thread  <Thread(Thread-13984, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 152.17it/s]


current loss: [[2.07348817e+41]]
running thread  <Thread(Thread-13985, initial)>
running thread  <Thread(Thread-13986, initial)>
running thread  <Thread(Thread-13987, initial)>
running thread  <Thread(Thread-13988, initial)>
running thread  <Thread(Thread-13989, initial)>
running thread  <Thread(Thread-13990, initial)>
running thread  <Thread(Thread-13991, initial)>
running thread  <Thread(Thread-13992, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 159.22it/s]


current loss: [[nan]]
running thread  <Thread(Thread-13993, initial)>
running thread  <Thread(Thread-13994, initial)>
running thread  <Thread(Thread-13995, initial)>
running thread  <Thread(Thread-13996, initial)>
running thread  <Thread(Thread-13997, initial)>
running thread  <Thread(Thread-13998, initial)>
running thread  <Thread(Thread-13999, initial)>
running thread  <Thread(Thread-14000, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 138.07it/s]


current loss: [[1.785726e+33]]
running thread  <Thread(Thread-14001, initial)>
running thread  <Thread(Thread-14002, initial)>
running thread  <Thread(Thread-14003, initial)>
running thread  <Thread(Thread-14004, initial)>
running thread  <Thread(Thread-14005, initial)>
running thread  <Thread(Thread-14006, initial)>
running thread  <Thread(Thread-14007, initial)>
running thread  <Thread(Thread-14008, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 133.76it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14009, initial)>
running thread  <Thread(Thread-14010, initial)>
running thread  <Thread(Thread-14011, initial)>
running thread  <Thread(Thread-14012, initial)>
running thread  <Thread(Thread-14013, initial)>
running thread  <Thread(Thread-14014, initial)>
running thread  <Thread(Thread-14015, initial)>
running thread  <Thread(Thread-14016, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


  numpy.add(multinomial_logreg_grad_i, gammaW, out=multinomial_logreg_grad_i)
100%|██████████| 40/40 [00:00<00:00, 144.44it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14017, initial)>
running thread  <Thread(Thread-14018, initial)>
running thread  <Thread(Thread-14019, initial)>
running thread  <Thread(Thread-14020, initial)>
running thread  <Thread(Thread-14021, initial)>
running thread  <Thread(Thread-14022, initial)>
running thread  <Thread(Thread-14023, initial)>
running thread  <Thread(Thread-14024, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 146.67it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14025, initial)>
running thread  <Thread(Thread-14026, initial)>
running thread  <Thread(Thread-14027, initial)>
running thread  <Thread(Thread-14028, initial)>
running thread  <Thread(Thread-14029, initial)>
running thread  <Thread(Thread-14030, initial)>
running thread  <Thread(Thread-14031, initial)>
running thread  <Thread(Thread-14032, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 136.71it/s]


current loss: [[3.78323104e+206]]
running thread  <Thread(Thread-14033, initial)>
running thread  <Thread(Thread-14034, initial)>
running thread  <Thread(Thread-14035, initial)>
running thread  <Thread(Thread-14036, initial)>
running thread  <Thread(Thread-14037, initial)>
running thread  <Thread(Thread-14038, initial)>
running thread  <Thread(Thread-14039, initial)>
running thread  <Thread(Thread-14040, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 155.33it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14041, initial)>
running thread  <Thread(Thread-14042, initial)>
running thread  <Thread(Thread-14043, initial)>
running thread  <Thread(Thread-14044, initial)>
running thread  <Thread(Thread-14045, initial)>
running thread  <Thread(Thread-14046, initial)>
running thread  <Thread(Thread-14047, initial)>
running thread  <Thread(Thread-14048, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 160.24it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14049, initial)>
running thread  <Thread(Thread-14050, initial)>
running thread  <Thread(Thread-14051, initial)>
running thread  <Thread(Thread-14052, initial)>
running thread  <Thread(Thread-14053, initial)>
running thread  <Thread(Thread-14054, initial)>
running thread  <Thread(Thread-14055, initial)>
running thread  <Thread(Thread-14056, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 134.19it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14057, initial)>
running thread  <Thread(Thread-14058, initial)>
running thread  <Thread(Thread-14059, initial)>
running thread  <Thread(Thread-14060, initial)>
running thread  <Thread(Thread-14061, initial)>
running thread  <Thread(Thread-14062, initial)>
running thread  <Thread(Thread-14063, initial)>
running thread  <Thread(Thread-14064, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 135.36it/s]


current loss: [[2.63599985e+298]]
running thread  <Thread(Thread-14065, initial)>
running thread  <Thread(Thread-14066, initial)>
running thread  <Thread(Thread-14067, initial)>
running thread  <Thread(Thread-14068, initial)>
running thread  <Thread(Thread-14069, initial)>
running thread  <Thread(Thread-14070, initial)>
running thread  <Thread(Thread-14071, initial)>
running thread  <Thread(Thread-14072, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.87it/s]


current loss: [[1.41681204e+206]]
running thread  <Thread(Thread-14073, initial)>
running thread  <Thread(Thread-14074, initial)>
running thread  <Thread(Thread-14075, initial)>
running thread  <Thread(Thread-14076, initial)>
running thread  <Thread(Thread-14077, initial)>
running thread  <Thread(Thread-14078, initial)>
running thread  <Thread(Thread-14079, initial)>
running thread  <Thread(Thread-14080, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 117.17it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14081, initial)>
running thread  <Thread(Thread-14082, initial)>
running thread  <Thread(Thread-14083, initial)>
running thread  <Thread(Thread-14084, initial)>
running thread  <Thread(Thread-14085, initial)>
running thread  <Thread(Thread-14086, initial)>
running thread  <Thread(Thread-14087, initial)>
running thread  <Thread(Thread-14088, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 109.78it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14089, initial)>
running thread  <Thread(Thread-14090, initial)>
running thread  <Thread(Thread-14091, initial)>
running thread  <Thread(Thread-14092, initial)>
running thread  <Thread(Thread-14093, initial)>
running thread  <Thread(Thread-14094, initial)>
running thread  <Thread(Thread-14095, initial)>
running thread  <Thread(Thread-14096, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 115.24it/s]


current loss: [[2.0695895e+133]]
running thread  <Thread(Thread-14097, initial)>
running thread  <Thread(Thread-14098, initial)>
running thread  <Thread(Thread-14099, initial)>
running thread  <Thread(Thread-14100, initial)>
running thread  <Thread(Thread-14101, initial)>
running thread  <Thread(Thread-14102, initial)>
running thread  <Thread(Thread-14103, initial)>
running thread  <Thread(Thread-14104, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.27it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14105, initial)>
running thread  <Thread(Thread-14106, initial)>
running thread  <Thread(Thread-14107, initial)>
running thread  <Thread(Thread-14108, initial)>
running thread  <Thread(Thread-14109, initial)>
running thread  <Thread(Thread-14110, initial)>
running thread  <Thread(Thread-14111, initial)>
running thread  <Thread(Thread-14112, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.88it/s]


current loss: [[1.16322867e+59]]
running thread  <Thread(Thread-14113, initial)>
running thread  <Thread(Thread-14114, initial)>
running thread  <Thread(Thread-14115, initial)>
running thread  <Thread(Thread-14116, initial)>
running thread  <Thread(Thread-14117, initial)>
running thread  <Thread(Thread-14118, initial)>
running thread  <Thread(Thread-14119, initial)>
running thread  <Thread(Thread-14120, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 108.55it/s]


current loss: [[4.24961139e+110]]
running thread  <Thread(Thread-14121, initial)>
running thread  <Thread(Thread-14122, initial)>
running thread  <Thread(Thread-14123, initial)>
running thread  <Thread(Thread-14124, initial)>
running thread  <Thread(Thread-14125, initial)>
running thread  <Thread(Thread-14126, initial)>
running thread  <Thread(Thread-14127, initial)>
running thread  <Thread(Thread-14128, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.66it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14129, initial)>
running thread  <Thread(Thread-14130, initial)>
running thread  <Thread(Thread-14131, initial)>
running thread  <Thread(Thread-14132, initial)>
running thread  <Thread(Thread-14133, initial)>
running thread  <Thread(Thread-14134, initial)>
running thread  <Thread(Thread-14135, initial)>
running thread  <Thread(Thread-14136, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.78it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14137, initial)>
running thread  <Thread(Thread-14138, initial)>
running thread  <Thread(Thread-14139, initial)>
running thread  <Thread(Thread-14140, initial)>
running thread  <Thread(Thread-14141, initial)>
running thread  <Thread(Thread-14142, initial)>
running thread  <Thread(Thread-14143, initial)>
running thread  <Thread(Thread-14144, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 108.27it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14145, initial)>
running thread  <Thread(Thread-14146, initial)>
running thread  <Thread(Thread-14147, initial)>
running thread  <Thread(Thread-14148, initial)>
running thread  <Thread(Thread-14149, initial)>
running thread  <Thread(Thread-14150, initial)>
running thread  <Thread(Thread-14151, initial)>
running thread  <Thread(Thread-14152, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.61it/s]


current loss: [[35.4606125]]
running thread  <Thread(Thread-14153, initial)>
running thread  <Thread(Thread-14154, initial)>
running thread  <Thread(Thread-14155, initial)>
running thread  <Thread(Thread-14156, initial)>
running thread  <Thread(Thread-14157, initial)>
running thread  <Thread(Thread-14158, initial)>
running thread  <Thread(Thread-14159, initial)>
running thread  <Thread(Thread-14160, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.66it/s]


current loss: [[1.30074695e+98]]
running thread  <Thread(Thread-14161, initial)>
running thread  <Thread(Thread-14162, initial)>
running thread  <Thread(Thread-14163, initial)>
running thread  <Thread(Thread-14164, initial)>
running thread  <Thread(Thread-14165, initial)>
running thread  <Thread(Thread-14166, initial)>
running thread  <Thread(Thread-14167, initial)>
running thread  <Thread(Thread-14168, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 114.52it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14169, initial)>
running thread  <Thread(Thread-14170, initial)>
running thread  <Thread(Thread-14171, initial)>
running thread  <Thread(Thread-14172, initial)>
running thread  <Thread(Thread-14173, initial)>
running thread  <Thread(Thread-14174, initial)>
running thread  <Thread(Thread-14175, initial)>
running thread  <Thread(Thread-14176, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.35it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14177, initial)>
running thread  <Thread(Thread-14178, initial)>
running thread  <Thread(Thread-14179, initial)>
running thread  <Thread(Thread-14180, initial)>
running thread  <Thread(Thread-14181, initial)>
running thread  <Thread(Thread-14182, initial)>
running thread  <Thread(Thread-14183, initial)>
running thread  <Thread(Thread-14184, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 100.31it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14185, initial)>
running thread  <Thread(Thread-14186, initial)>
running thread  <Thread(Thread-14187, initial)>
running thread  <Thread(Thread-14188, initial)>
running thread  <Thread(Thread-14189, initial)>
running thread  <Thread(Thread-14190, initial)>
running thread  <Thread(Thread-14191, initial)>
running thread  <Thread(Thread-14192, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.77it/s]


current loss: [[47.57121436]]
running thread  <Thread(Thread-14193, initial)>
running thread  <Thread(Thread-14194, initial)>
running thread  <Thread(Thread-14195, initial)>
running thread  <Thread(Thread-14196, initial)>
running thread  <Thread(Thread-14197, initial)>
running thread  <Thread(Thread-14198, initial)>
running thread  <Thread(Thread-14199, initial)>
running thread  <Thread(Thread-14200, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.08it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14201, initial)>
running thread  <Thread(Thread-14202, initial)>
running thread  <Thread(Thread-14203, initial)>
running thread  <Thread(Thread-14204, initial)>
running thread  <Thread(Thread-14205, initial)>
running thread  <Thread(Thread-14206, initial)>
running thread  <Thread(Thread-14207, initial)>
running thread  <Thread(Thread-14208, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.03it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14209, initial)>
running thread  <Thread(Thread-14210, initial)>
running thread  <Thread(Thread-14211, initial)>
running thread  <Thread(Thread-14212, initial)>
running thread  <Thread(Thread-14213, initial)>
running thread  <Thread(Thread-14214, initial)>
running thread  <Thread(Thread-14215, initial)>
running thread  <Thread(Thread-14216, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 100.92it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14217, initial)>
running thread  <Thread(Thread-14218, initial)>
running thread  <Thread(Thread-14219, initial)>
running thread  <Thread(Thread-14220, initial)>
running thread  <Thread(Thread-14221, initial)>
running thread  <Thread(Thread-14222, initial)>
running thread  <Thread(Thread-14223, initial)>
running thread  <Thread(Thread-14224, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.28it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14225, initial)>
running thread  <Thread(Thread-14226, initial)>
running thread  <Thread(Thread-14227, initial)>
running thread  <Thread(Thread-14228, initial)>
running thread  <Thread(Thread-14229, initial)>
running thread  <Thread(Thread-14230, initial)>
running thread  <Thread(Thread-14231, initial)>
running thread  <Thread(Thread-14232, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 108.90it/s]


current loss: [[2.80201766e+230]]
running thread  <Thread(Thread-14233, initial)>
running thread  <Thread(Thread-14234, initial)>
running thread  <Thread(Thread-14235, initial)>
running thread  <Thread(Thread-14236, initial)>
running thread  <Thread(Thread-14237, initial)>
running thread  <Thread(Thread-14238, initial)>
running thread  <Thread(Thread-14239, initial)>
running thread  <Thread(Thread-14240, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.76it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14241, initial)>
running thread  <Thread(Thread-14242, initial)>
running thread  <Thread(Thread-14243, initial)>
running thread  <Thread(Thread-14244, initial)>
running thread  <Thread(Thread-14245, initial)>
running thread  <Thread(Thread-14246, initial)>
running thread  <Thread(Thread-14247, initial)>
running thread  <Thread(Thread-14248, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 98.69it/s]


current loss: [[2.6339386e+254]]
running thread  <Thread(Thread-14249, initial)>
running thread  <Thread(Thread-14250, initial)>
running thread  <Thread(Thread-14251, initial)>
running thread  <Thread(Thread-14252, initial)>
running thread  <Thread(Thread-14253, initial)>
running thread  <Thread(Thread-14254, initial)>
running thread  <Thread(Thread-14255, initial)>
running thread  <Thread(Thread-14256, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 100.69it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14257, initial)>
running thread  <Thread(Thread-14258, initial)>
running thread  <Thread(Thread-14259, initial)>
running thread  <Thread(Thread-14260, initial)>
running thread  <Thread(Thread-14261, initial)>
running thread  <Thread(Thread-14262, initial)>
running thread  <Thread(Thread-14263, initial)>
running thread  <Thread(Thread-14264, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 107.92it/s]


current loss: [[1.41717607e+238]]
running thread  <Thread(Thread-14265, initial)>
running thread  <Thread(Thread-14266, initial)>
running thread  <Thread(Thread-14267, initial)>
running thread  <Thread(Thread-14268, initial)>
running thread  <Thread(Thread-14269, initial)>
running thread  <Thread(Thread-14270, initial)>
running thread  <Thread(Thread-14271, initial)>
running thread  <Thread(Thread-14272, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 102.27it/s]


current loss: [[1.62733872e+117]]
running thread  <Thread(Thread-14273, initial)>
running thread  <Thread(Thread-14274, initial)>
running thread  <Thread(Thread-14275, initial)>
running thread  <Thread(Thread-14276, initial)>
running thread  <Thread(Thread-14277, initial)>
running thread  <Thread(Thread-14278, initial)>
running thread  <Thread(Thread-14279, initial)>
running thread  <Thread(Thread-14280, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 108.04it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14281, initial)>
running thread  <Thread(Thread-14282, initial)>
running thread  <Thread(Thread-14283, initial)>
running thread  <Thread(Thread-14284, initial)>
running thread  <Thread(Thread-14285, initial)>
running thread  <Thread(Thread-14286, initial)>
running thread  <Thread(Thread-14287, initial)>
running thread  <Thread(Thread-14288, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 108.05it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14289, initial)>
running thread  <Thread(Thread-14290, initial)>
running thread  <Thread(Thread-14291, initial)>
running thread  <Thread(Thread-14292, initial)>
running thread  <Thread(Thread-14293, initial)>
running thread  <Thread(Thread-14294, initial)>
running thread  <Thread(Thread-14295, initial)>
running thread  <Thread(Thread-14296, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 121.43it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14297, initial)>
running thread  <Thread(Thread-14298, initial)>
running thread  <Thread(Thread-14299, initial)>
running thread  <Thread(Thread-14300, initial)>
running thread  <Thread(Thread-14301, initial)>
running thread  <Thread(Thread-14302, initial)>
running thread  <Thread(Thread-14303, initial)>
running thread  <Thread(Thread-14304, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.22it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14305, initial)>
running thread  <Thread(Thread-14306, initial)>
running thread  <Thread(Thread-14307, initial)>
running thread  <Thread(Thread-14308, initial)>
running thread  <Thread(Thread-14309, initial)>
running thread  <Thread(Thread-14310, initial)>
running thread  <Thread(Thread-14311, initial)>
running thread  <Thread(Thread-14312, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.33it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14313, initial)>
running thread  <Thread(Thread-14314, initial)>
running thread  <Thread(Thread-14315, initial)>
running thread  <Thread(Thread-14316, initial)>
running thread  <Thread(Thread-14317, initial)>
running thread  <Thread(Thread-14318, initial)>
running thread  <Thread(Thread-14319, initial)>
running thread  <Thread(Thread-14320, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 96.46it/s]


current loss: [[3.93389363e+65]]
running thread  <Thread(Thread-14321, initial)>
running thread  <Thread(Thread-14322, initial)>
running thread  <Thread(Thread-14323, initial)>
running thread  <Thread(Thread-14324, initial)>
running thread  <Thread(Thread-14325, initial)>
running thread  <Thread(Thread-14326, initial)>
running thread  <Thread(Thread-14327, initial)>
running thread  <Thread(Thread-14328, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 102.53it/s]


current loss: [[2.65900815e+129]]
running thread  <Thread(Thread-14329, initial)>
running thread  <Thread(Thread-14330, initial)>
running thread  <Thread(Thread-14331, initial)>
running thread  <Thread(Thread-14332, initial)>
running thread  <Thread(Thread-14333, initial)>
running thread  <Thread(Thread-14334, initial)>
running thread  <Thread(Thread-14335, initial)>
running thread  <Thread(Thread-14336, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.57it/s]


current loss: [[7.49858927e+142]]
running thread  <Thread(Thread-14337, initial)>
running thread  <Thread(Thread-14338, initial)>
running thread  <Thread(Thread-14339, initial)>
running thread  <Thread(Thread-14340, initial)>
running thread  <Thread(Thread-14341, initial)>
running thread  <Thread(Thread-14342, initial)>
running thread  <Thread(Thread-14343, initial)>
running thread  <Thread(Thread-14344, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.62it/s]


current loss: [[6.14895422e+97]]
running thread  <Thread(Thread-14345, initial)>
running thread  <Thread(Thread-14346, initial)>
running thread  <Thread(Thread-14347, initial)>
running thread  <Thread(Thread-14348, initial)>
running thread  <Thread(Thread-14349, initial)>
running thread  <Thread(Thread-14350, initial)>
running thread  <Thread(Thread-14351, initial)>
running thread  <Thread(Thread-14352, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 130.13it/s]


current loss: [[2.1538707e+267]]
running thread  <Thread(Thread-14353, initial)>
running thread  <Thread(Thread-14354, initial)>
running thread  <Thread(Thread-14355, initial)>
running thread  <Thread(Thread-14356, initial)>
running thread  <Thread(Thread-14357, initial)>
running thread  <Thread(Thread-14358, initial)>
running thread  <Thread(Thread-14359, initial)>
running thread  <Thread(Thread-14360, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 109.88it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14361, initial)>
running thread  <Thread(Thread-14362, initial)>
running thread  <Thread(Thread-14363, initial)>
running thread  <Thread(Thread-14364, initial)>
running thread  <Thread(Thread-14365, initial)>
running thread  <Thread(Thread-14366, initial)>
running thread  <Thread(Thread-14367, initial)>
running thread  <Thread(Thread-14368, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.72it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14369, initial)>
running thread  <Thread(Thread-14370, initial)>
running thread  <Thread(Thread-14371, initial)>
running thread  <Thread(Thread-14372, initial)>
running thread  <Thread(Thread-14373, initial)>
running thread  <Thread(Thread-14374, initial)>
running thread  <Thread(Thread-14375, initial)>
running thread  <Thread(Thread-14376, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 122.37it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14377, initial)>
running thread  <Thread(Thread-14378, initial)>
running thread  <Thread(Thread-14379, initial)>
running thread  <Thread(Thread-14380, initial)>
running thread  <Thread(Thread-14381, initial)>
running thread  <Thread(Thread-14382, initial)>
running thread  <Thread(Thread-14383, initial)>
running thread  <Thread(Thread-14384, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.87it/s]


current loss: [[4.53333771e+143]]
running thread  <Thread(Thread-14385, initial)>
running thread  <Thread(Thread-14386, initial)>
running thread  <Thread(Thread-14387, initial)>
running thread  <Thread(Thread-14388, initial)>
running thread  <Thread(Thread-14389, initial)>
running thread  <Thread(Thread-14390, initial)>
running thread  <Thread(Thread-14391, initial)>
running thread  <Thread(Thread-14392, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 107.28it/s]


current loss: [[2.66018258e+166]]
running thread  <Thread(Thread-14393, initial)>
running thread  <Thread(Thread-14394, initial)>
running thread  <Thread(Thread-14395, initial)>
running thread  <Thread(Thread-14396, initial)>
running thread  <Thread(Thread-14397, initial)>
running thread  <Thread(Thread-14398, initial)>
running thread  <Thread(Thread-14399, initial)>
running thread  <Thread(Thread-14400, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.67it/s]


current loss: [[2.67978288e+230]]
running thread  <Thread(Thread-14401, initial)>
running thread  <Thread(Thread-14402, initial)>
running thread  <Thread(Thread-14403, initial)>
running thread  <Thread(Thread-14404, initial)>
running thread  <Thread(Thread-14405, initial)>
running thread  <Thread(Thread-14406, initial)>
running thread  <Thread(Thread-14407, initial)>
running thread  <Thread(Thread-14408, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 109.01it/s]


current loss: [[1.10603057e+196]]
running thread  <Thread(Thread-14409, initial)>
running thread  <Thread(Thread-14410, initial)>
running thread  <Thread(Thread-14411, initial)>
running thread  <Thread(Thread-14412, initial)>
running thread  <Thread(Thread-14413, initial)>
running thread  <Thread(Thread-14414, initial)>
running thread  <Thread(Thread-14415, initial)>
running thread  <Thread(Thread-14416, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.67it/s]


current loss: [[2.0852998e+223]]
running thread  <Thread(Thread-14417, initial)>
running thread  <Thread(Thread-14418, initial)>
running thread  <Thread(Thread-14419, initial)>
running thread  <Thread(Thread-14420, initial)>
running thread  <Thread(Thread-14421, initial)>
running thread  <Thread(Thread-14422, initial)>
running thread  <Thread(Thread-14423, initial)>
running thread  <Thread(Thread-14424, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.98it/s] 


current loss: [[7.31740925e+270]]
running thread  <Thread(Thread-14425, initial)>
running thread  <Thread(Thread-14426, initial)>
running thread  <Thread(Thread-14427, initial)>
running thread  <Thread(Thread-14428, initial)>
running thread  <Thread(Thread-14429, initial)>
running thread  <Thread(Thread-14430, initial)>
running thread  <Thread(Thread-14431, initial)>
running thread  <Thread(Thread-14432, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.31it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14433, initial)>
running thread  <Thread(Thread-14434, initial)>
running thread  <Thread(Thread-14435, initial)>
running thread  <Thread(Thread-14436, initial)>
running thread  <Thread(Thread-14437, initial)>
running thread  <Thread(Thread-14438, initial)>
running thread  <Thread(Thread-14439, initial)>
running thread  <Thread(Thread-14440, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.72it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14441, initial)>
running thread  <Thread(Thread-14442, initial)>
running thread  <Thread(Thread-14443, initial)>
running thread  <Thread(Thread-14444, initial)>
running thread  <Thread(Thread-14445, initial)>
running thread  <Thread(Thread-14446, initial)>
running thread  <Thread(Thread-14447, initial)>
running thread  <Thread(Thread-14448, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.57it/s]


current loss: [[3.37710606e+139]]
running thread  <Thread(Thread-14449, initial)>
running thread  <Thread(Thread-14450, initial)>
running thread  <Thread(Thread-14451, initial)>
running thread  <Thread(Thread-14452, initial)>
running thread  <Thread(Thread-14453, initial)>
running thread  <Thread(Thread-14454, initial)>
running thread  <Thread(Thread-14455, initial)>
running thread  <Thread(Thread-14456, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.31it/s] 


current loss: [[6.36444452e+63]]
running thread  <Thread(Thread-14457, initial)>
running thread  <Thread(Thread-14458, initial)>
running thread  <Thread(Thread-14459, initial)>
running thread  <Thread(Thread-14460, initial)>
running thread  <Thread(Thread-14461, initial)>
running thread  <Thread(Thread-14462, initial)>
running thread  <Thread(Thread-14463, initial)>
running thread  <Thread(Thread-14464, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 119.94it/s]


current loss: [[8.0628779e+274]]
running thread  <Thread(Thread-14465, initial)>
running thread  <Thread(Thread-14466, initial)>
running thread  <Thread(Thread-14467, initial)>
running thread  <Thread(Thread-14468, initial)>
running thread  <Thread(Thread-14469, initial)>
running thread  <Thread(Thread-14470, initial)>
running thread  <Thread(Thread-14471, initial)>
running thread  <Thread(Thread-14472, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 96.17it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14473, initial)>
running thread  <Thread(Thread-14474, initial)>
running thread  <Thread(Thread-14475, initial)>
running thread  <Thread(Thread-14476, initial)>
running thread  <Thread(Thread-14477, initial)>
running thread  <Thread(Thread-14478, initial)>
running thread  <Thread(Thread-14479, initial)>
running thread  <Thread(Thread-14480, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.85it/s]


current loss: [[6.94556331e+77]]
running thread  <Thread(Thread-14481, initial)>
running thread  <Thread(Thread-14482, initial)>
running thread  <Thread(Thread-14483, initial)>
running thread  <Thread(Thread-14484, initial)>
running thread  <Thread(Thread-14485, initial)>
running thread  <Thread(Thread-14486, initial)>
running thread  <Thread(Thread-14487, initial)>
running thread  <Thread(Thread-14488, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.13it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14489, initial)>
running thread  <Thread(Thread-14490, initial)>
running thread  <Thread(Thread-14491, initial)>
running thread  <Thread(Thread-14492, initial)>
running thread  <Thread(Thread-14493, initial)>
running thread  <Thread(Thread-14494, initial)>
running thread  <Thread(Thread-14495, initial)>
running thread  <Thread(Thread-14496, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.27it/s]


current loss: [[5.07198865e+225]]
running thread  <Thread(Thread-14497, initial)>
running thread  <Thread(Thread-14498, initial)>
running thread  <Thread(Thread-14499, initial)>
running thread  <Thread(Thread-14500, initial)>
running thread  <Thread(Thread-14501, initial)>
running thread  <Thread(Thread-14502, initial)>
running thread  <Thread(Thread-14503, initial)>
running thread  <Thread(Thread-14504, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.73it/s]


current loss: [[1.23233256e+100]]
running thread  <Thread(Thread-14505, initial)>
running thread  <Thread(Thread-14506, initial)>
running thread  <Thread(Thread-14507, initial)>
running thread  <Thread(Thread-14508, initial)>
running thread  <Thread(Thread-14509, initial)>
running thread  <Thread(Thread-14510, initial)>
running thread  <Thread(Thread-14511, initial)>
running thread  <Thread(Thread-14512, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.56it/s]


current loss: [[47.82476361]]
running thread  <Thread(Thread-14513, initial)>
running thread  <Thread(Thread-14514, initial)>
running thread  <Thread(Thread-14515, initial)>
running thread  <Thread(Thread-14516, initial)>
running thread  <Thread(Thread-14517, initial)>
running thread  <Thread(Thread-14518, initial)>
running thread  <Thread(Thread-14519, initial)>
running thread  <Thread(Thread-14520, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 118.37it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14521, initial)>
running thread  <Thread(Thread-14522, initial)>
running thread  <Thread(Thread-14523, initial)>
running thread  <Thread(Thread-14524, initial)>
running thread  <Thread(Thread-14525, initial)>
running thread  <Thread(Thread-14526, initial)>
running thread  <Thread(Thread-14527, initial)>
running thread  <Thread(Thread-14528, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.24it/s]


current loss: [[1.17611925e+159]]
running thread  <Thread(Thread-14529, initial)>
running thread  <Thread(Thread-14530, initial)>
running thread  <Thread(Thread-14531, initial)>
running thread  <Thread(Thread-14532, initial)>
running thread  <Thread(Thread-14533, initial)>
running thread  <Thread(Thread-14534, initial)>
running thread  <Thread(Thread-14535, initial)>
running thread  <Thread(Thread-14536, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.80it/s]


current loss: [[1.86526471e+137]]
running thread  <Thread(Thread-14537, initial)>
running thread  <Thread(Thread-14538, initial)>
running thread  <Thread(Thread-14539, initial)>
running thread  <Thread(Thread-14540, initial)>
running thread  <Thread(Thread-14541, initial)>
running thread  <Thread(Thread-14542, initial)>
running thread  <Thread(Thread-14543, initial)>
running thread  <Thread(Thread-14544, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.61it/s]


current loss: [[6.96174585e+208]]
running thread  <Thread(Thread-14545, initial)>
running thread  <Thread(Thread-14546, initial)>
running thread  <Thread(Thread-14547, initial)>
running thread  <Thread(Thread-14548, initial)>
running thread  <Thread(Thread-14549, initial)>
running thread  <Thread(Thread-14550, initial)>
running thread  <Thread(Thread-14551, initial)>
running thread  <Thread(Thread-14552, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 102.76it/s]


current loss: [[7.70083292e+226]]
running thread  <Thread(Thread-14553, initial)>
running thread  <Thread(Thread-14554, initial)>
running thread  <Thread(Thread-14555, initial)>
running thread  <Thread(Thread-14556, initial)>
running thread  <Thread(Thread-14557, initial)>
running thread  <Thread(Thread-14558, initial)>
running thread  <Thread(Thread-14559, initial)>
running thread  <Thread(Thread-14560, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.31it/s]


current loss: [[5.04747866e+290]]
running thread  <Thread(Thread-14561, initial)>
running thread  <Thread(Thread-14562, initial)>
running thread  <Thread(Thread-14563, initial)>
running thread  <Thread(Thread-14564, initial)>
running thread  <Thread(Thread-14565, initial)>
running thread  <Thread(Thread-14566, initial)>
running thread  <Thread(Thread-14567, initial)>
running thread  <Thread(Thread-14568, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.55it/s]


current loss: [[4.83978784e+153]]
running thread  <Thread(Thread-14569, initial)>
running thread  <Thread(Thread-14570, initial)>
running thread  <Thread(Thread-14571, initial)>
running thread  <Thread(Thread-14572, initial)>
running thread  <Thread(Thread-14573, initial)>
running thread  <Thread(Thread-14574, initial)>
running thread  <Thread(Thread-14575, initial)>
running thread  <Thread(Thread-14576, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.45it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14577, initial)>
running thread  <Thread(Thread-14578, initial)>
running thread  <Thread(Thread-14579, initial)>
running thread  <Thread(Thread-14580, initial)>
running thread  <Thread(Thread-14581, initial)>
running thread  <Thread(Thread-14582, initial)>
running thread  <Thread(Thread-14583, initial)>
running thread  <Thread(Thread-14584, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 96.92it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14585, initial)>
running thread  <Thread(Thread-14586, initial)>
running thread  <Thread(Thread-14587, initial)>
running thread  <Thread(Thread-14588, initial)>
running thread  <Thread(Thread-14589, initial)>
running thread  <Thread(Thread-14590, initial)>
running thread  <Thread(Thread-14591, initial)>
running thread  <Thread(Thread-14592, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.20it/s]


current loss: [[2.73890389e+239]]
running thread  <Thread(Thread-14593, initial)>
running thread  <Thread(Thread-14594, initial)>
running thread  <Thread(Thread-14595, initial)>
running thread  <Thread(Thread-14596, initial)>
running thread  <Thread(Thread-14597, initial)>
running thread  <Thread(Thread-14598, initial)>
running thread  <Thread(Thread-14599, initial)>
running thread  <Thread(Thread-14600, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.21it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14601, initial)>
running thread  <Thread(Thread-14602, initial)>
running thread  <Thread(Thread-14603, initial)>
running thread  <Thread(Thread-14604, initial)>
running thread  <Thread(Thread-14605, initial)>
running thread  <Thread(Thread-14606, initial)>
running thread  <Thread(Thread-14607, initial)>
running thread  <Thread(Thread-14608, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.77it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14609, initial)>
running thread  <Thread(Thread-14610, initial)>
running thread  <Thread(Thread-14611, initial)>
running thread  <Thread(Thread-14612, initial)>
running thread  <Thread(Thread-14613, initial)>
running thread  <Thread(Thread-14614, initial)>
running thread  <Thread(Thread-14615, initial)>
running thread  <Thread(Thread-14616, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.91it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14617, initial)>
running thread  <Thread(Thread-14618, initial)>
running thread  <Thread(Thread-14619, initial)>
running thread  <Thread(Thread-14620, initial)>
running thread  <Thread(Thread-14621, initial)>
running thread  <Thread(Thread-14622, initial)>
running thread  <Thread(Thread-14623, initial)>
running thread  <Thread(Thread-14624, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 100.65it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14625, initial)>
running thread  <Thread(Thread-14626, initial)>
running thread  <Thread(Thread-14627, initial)>
running thread  <Thread(Thread-14628, initial)>
running thread  <Thread(Thread-14629, initial)>
running thread  <Thread(Thread-14630, initial)>
running thread  <Thread(Thread-14631, initial)>
running thread  <Thread(Thread-14632, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.50it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14633, initial)>
running thread  <Thread(Thread-14634, initial)>
running thread  <Thread(Thread-14635, initial)>
running thread  <Thread(Thread-14636, initial)>
running thread  <Thread(Thread-14637, initial)>
running thread  <Thread(Thread-14638, initial)>
running thread  <Thread(Thread-14639, initial)>
running thread  <Thread(Thread-14640, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.46it/s]


current loss: [[5.22065304e+27]]
running thread  <Thread(Thread-14641, initial)>
running thread  <Thread(Thread-14642, initial)>
running thread  <Thread(Thread-14643, initial)>
running thread  <Thread(Thread-14644, initial)>
running thread  <Thread(Thread-14645, initial)>
running thread  <Thread(Thread-14646, initial)>
running thread  <Thread(Thread-14647, initial)>
running thread  <Thread(Thread-14648, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 118.42it/s]


current loss: [[5.97108281e+263]]
running thread  <Thread(Thread-14649, initial)>
running thread  <Thread(Thread-14650, initial)>
running thread  <Thread(Thread-14651, initial)>
running thread  <Thread(Thread-14652, initial)>
running thread  <Thread(Thread-14653, initial)>
running thread  <Thread(Thread-14654, initial)>
running thread  <Thread(Thread-14655, initial)>
running thread  <Thread(Thread-14656, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 92.74it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14657, initial)>
running thread  <Thread(Thread-14658, initial)>
running thread  <Thread(Thread-14659, initial)>
running thread  <Thread(Thread-14660, initial)>
running thread  <Thread(Thread-14661, initial)>
running thread  <Thread(Thread-14662, initial)>
running thread  <Thread(Thread-14663, initial)>
running thread  <Thread(Thread-14664, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 119.78it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14665, initial)>
running thread  <Thread(Thread-14666, initial)>
running thread  <Thread(Thread-14667, initial)>
running thread  <Thread(Thread-14668, initial)>
running thread  <Thread(Thread-14669, initial)>
running thread  <Thread(Thread-14670, initial)>
running thread  <Thread(Thread-14671, initial)>
running thread  <Thread(Thread-14672, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.58it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14673, initial)>
running thread  <Thread(Thread-14674, initial)>
running thread  <Thread(Thread-14675, initial)>
running thread  <Thread(Thread-14676, initial)>
running thread  <Thread(Thread-14677, initial)>
running thread  <Thread(Thread-14678, initial)>
running thread  <Thread(Thread-14679, initial)>
running thread  <Thread(Thread-14680, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 102.92it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14681, initial)>
running thread  <Thread(Thread-14682, initial)>
running thread  <Thread(Thread-14683, initial)>
running thread  <Thread(Thread-14684, initial)>
running thread  <Thread(Thread-14685, initial)>
running thread  <Thread(Thread-14686, initial)>
running thread  <Thread(Thread-14687, initial)>
running thread  <Thread(Thread-14688, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 115.14it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14689, initial)>
running thread  <Thread(Thread-14690, initial)>
running thread  <Thread(Thread-14691, initial)>
running thread  <Thread(Thread-14692, initial)>
running thread  <Thread(Thread-14693, initial)>
running thread  <Thread(Thread-14694, initial)>
running thread  <Thread(Thread-14695, initial)>
running thread  <Thread(Thread-14696, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 115.46it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14697, initial)>
running thread  <Thread(Thread-14698, initial)>
running thread  <Thread(Thread-14699, initial)>
running thread  <Thread(Thread-14700, initial)>
running thread  <Thread(Thread-14701, initial)>
running thread  <Thread(Thread-14702, initial)>
running thread  <Thread(Thread-14703, initial)>
running thread  <Thread(Thread-14704, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.24it/s]


current loss: [[5.72497838e+173]]
running thread  <Thread(Thread-14705, initial)>
running thread  <Thread(Thread-14706, initial)>
running thread  <Thread(Thread-14707, initial)>
running thread  <Thread(Thread-14708, initial)>
running thread  <Thread(Thread-14709, initial)>
running thread  <Thread(Thread-14710, initial)>
running thread  <Thread(Thread-14711, initial)>
running thread  <Thread(Thread-14712, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 95.84it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14713, initial)>
running thread  <Thread(Thread-14714, initial)>
running thread  <Thread(Thread-14715, initial)>
running thread  <Thread(Thread-14716, initial)>
running thread  <Thread(Thread-14717, initial)>
running thread  <Thread(Thread-14718, initial)>
running thread  <Thread(Thread-14719, initial)>
running thread  <Thread(Thread-14720, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 107.23it/s]


current loss: [[4.61482001e+180]]
running thread  <Thread(Thread-14721, initial)>
running thread  <Thread(Thread-14722, initial)>
running thread  <Thread(Thread-14723, initial)>
running thread  <Thread(Thread-14724, initial)>
running thread  <Thread(Thread-14725, initial)>
running thread  <Thread(Thread-14726, initial)>
running thread  <Thread(Thread-14727, initial)>
running thread  <Thread(Thread-14728, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.44it/s] 


current loss: [[1.47859107e+113]]
running thread  <Thread(Thread-14729, initial)>
running thread  <Thread(Thread-14730, initial)>
running thread  <Thread(Thread-14731, initial)>
running thread  <Thread(Thread-14732, initial)>
running thread  <Thread(Thread-14733, initial)>
running thread  <Thread(Thread-14734, initial)>
running thread  <Thread(Thread-14735, initial)>
running thread  <Thread(Thread-14736, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 92.14it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14737, initial)>
running thread  <Thread(Thread-14738, initial)>
running thread  <Thread(Thread-14739, initial)>
running thread  <Thread(Thread-14740, initial)>
running thread  <Thread(Thread-14741, initial)>
running thread  <Thread(Thread-14742, initial)>
running thread  <Thread(Thread-14743, initial)>
running thread  <Thread(Thread-14744, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.96it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14745, initial)>
running thread  <Thread(Thread-14746, initial)>
running thread  <Thread(Thread-14747, initial)>
running thread  <Thread(Thread-14748, initial)>
running thread  <Thread(Thread-14749, initial)>
running thread  <Thread(Thread-14750, initial)>
running thread  <Thread(Thread-14751, initial)>
running thread  <Thread(Thread-14752, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.20it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14753, initial)>
running thread  <Thread(Thread-14754, initial)>
running thread  <Thread(Thread-14755, initial)>
running thread  <Thread(Thread-14756, initial)>
running thread  <Thread(Thread-14757, initial)>
running thread  <Thread(Thread-14758, initial)>
running thread  <Thread(Thread-14759, initial)>
running thread  <Thread(Thread-14760, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.62it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14761, initial)>
running thread  <Thread(Thread-14762, initial)>
running thread  <Thread(Thread-14763, initial)>
running thread  <Thread(Thread-14764, initial)>
running thread  <Thread(Thread-14765, initial)>
running thread  <Thread(Thread-14766, initial)>
running thread  <Thread(Thread-14767, initial)>
running thread  <Thread(Thread-14768, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 95.52it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14769, initial)>
running thread  <Thread(Thread-14770, initial)>
running thread  <Thread(Thread-14771, initial)>
running thread  <Thread(Thread-14772, initial)>
running thread  <Thread(Thread-14773, initial)>
running thread  <Thread(Thread-14774, initial)>
running thread  <Thread(Thread-14775, initial)>
running thread  <Thread(Thread-14776, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.01it/s]


current loss: [[3.63262379e+262]]
running thread  <Thread(Thread-14777, initial)>
running thread  <Thread(Thread-14778, initial)>
running thread  <Thread(Thread-14779, initial)>
running thread  <Thread(Thread-14780, initial)>
running thread  <Thread(Thread-14781, initial)>
running thread  <Thread(Thread-14782, initial)>
running thread  <Thread(Thread-14783, initial)>
running thread  <Thread(Thread-14784, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.83it/s]


current loss: [[1.04997171e+188]]
running thread  <Thread(Thread-14785, initial)>
running thread  <Thread(Thread-14786, initial)>
running thread  <Thread(Thread-14787, initial)>
running thread  <Thread(Thread-14788, initial)>
running thread  <Thread(Thread-14789, initial)>
running thread  <Thread(Thread-14790, initial)>
running thread  <Thread(Thread-14791, initial)>
running thread  <Thread(Thread-14792, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.33it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14793, initial)>
running thread  <Thread(Thread-14794, initial)>
running thread  <Thread(Thread-14795, initial)>
running thread  <Thread(Thread-14796, initial)>
running thread  <Thread(Thread-14797, initial)>
running thread  <Thread(Thread-14798, initial)>
running thread  <Thread(Thread-14799, initial)>
running thread  <Thread(Thread-14800, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 100.84it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14801, initial)>
running thread  <Thread(Thread-14802, initial)>
running thread  <Thread(Thread-14803, initial)>
running thread  <Thread(Thread-14804, initial)>
running thread  <Thread(Thread-14805, initial)>
running thread  <Thread(Thread-14806, initial)>
running thread  <Thread(Thread-14807, initial)>
running thread  <Thread(Thread-14808, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.61it/s]


current loss: [[2.45420566e+96]]
running thread  <Thread(Thread-14809, initial)>
running thread  <Thread(Thread-14810, initial)>
running thread  <Thread(Thread-14811, initial)>
running thread  <Thread(Thread-14812, initial)>
running thread  <Thread(Thread-14813, initial)>
running thread  <Thread(Thread-14814, initial)>
running thread  <Thread(Thread-14815, initial)>
running thread  <Thread(Thread-14816, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.09it/s]


current loss: [[1.01988265e+109]]
running thread  <Thread(Thread-14817, initial)>
running thread  <Thread(Thread-14818, initial)>
running thread  <Thread(Thread-14819, initial)>
running thread  <Thread(Thread-14820, initial)>
running thread  <Thread(Thread-14821, initial)>
running thread  <Thread(Thread-14822, initial)>
running thread  <Thread(Thread-14823, initial)>
running thread  <Thread(Thread-14824, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 113.18it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14825, initial)>
running thread  <Thread(Thread-14826, initial)>
running thread  <Thread(Thread-14827, initial)>
running thread  <Thread(Thread-14828, initial)>
running thread  <Thread(Thread-14829, initial)>
running thread  <Thread(Thread-14830, initial)>
running thread  <Thread(Thread-14831, initial)>
running thread  <Thread(Thread-14832, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 124.30it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14833, initial)>
running thread  <Thread(Thread-14834, initial)>
running thread  <Thread(Thread-14835, initial)>
running thread  <Thread(Thread-14836, initial)>
running thread  <Thread(Thread-14837, initial)>
running thread  <Thread(Thread-14838, initial)>
running thread  <Thread(Thread-14839, initial)>
running thread  <Thread(Thread-14840, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 107.99it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14841, initial)>
running thread  <Thread(Thread-14842, initial)>
running thread  <Thread(Thread-14843, initial)>
running thread  <Thread(Thread-14844, initial)>
running thread  <Thread(Thread-14845, initial)>
running thread  <Thread(Thread-14846, initial)>
running thread  <Thread(Thread-14847, initial)>
running thread  <Thread(Thread-14848, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 114.97it/s]


current loss: [[3.31498727e+22]]
running thread  <Thread(Thread-14849, initial)>
running thread  <Thread(Thread-14850, initial)>
running thread  <Thread(Thread-14851, initial)>
running thread  <Thread(Thread-14852, initial)>
running thread  <Thread(Thread-14853, initial)>
running thread  <Thread(Thread-14854, initial)>
running thread  <Thread(Thread-14855, initial)>
running thread  <Thread(Thread-14856, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.02it/s]


current loss: [[42.01866796]]
running thread  <Thread(Thread-14857, initial)>
running thread  <Thread(Thread-14858, initial)>
running thread  <Thread(Thread-14859, initial)>
running thread  <Thread(Thread-14860, initial)>
running thread  <Thread(Thread-14861, initial)>
running thread  <Thread(Thread-14862, initial)>
running thread  <Thread(Thread-14863, initial)>
running thread  <Thread(Thread-14864, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 114.92it/s]


current loss: [[5.99831275e+140]]
running thread  <Thread(Thread-14865, initial)>
running thread  <Thread(Thread-14866, initial)>
running thread  <Thread(Thread-14867, initial)>
running thread  <Thread(Thread-14868, initial)>
running thread  <Thread(Thread-14869, initial)>
running thread  <Thread(Thread-14870, initial)>
running thread  <Thread(Thread-14871, initial)>
running thread  <Thread(Thread-14872, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 118.86it/s]


current loss: [[7.82473427e+280]]
running thread  <Thread(Thread-14873, initial)>
running thread  <Thread(Thread-14874, initial)>
running thread  <Thread(Thread-14875, initial)>
running thread  <Thread(Thread-14876, initial)>
running thread  <Thread(Thread-14877, initial)>
running thread  <Thread(Thread-14878, initial)>
running thread  <Thread(Thread-14879, initial)>
running thread  <Thread(Thread-14880, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.53it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14881, initial)>
running thread  <Thread(Thread-14882, initial)>
running thread  <Thread(Thread-14883, initial)>
running thread  <Thread(Thread-14884, initial)>
running thread  <Thread(Thread-14885, initial)>
running thread  <Thread(Thread-14886, initial)>
running thread  <Thread(Thread-14887, initial)>
running thread  <Thread(Thread-14888, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.15it/s]


current loss: [[1.33021803e+195]]
running thread  <Thread(Thread-14889, initial)>
running thread  <Thread(Thread-14890, initial)>
running thread  <Thread(Thread-14891, initial)>
running thread  <Thread(Thread-14892, initial)>
running thread  <Thread(Thread-14893, initial)>
running thread  <Thread(Thread-14894, initial)>
running thread  <Thread(Thread-14895, initial)>
running thread  <Thread(Thread-14896, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.76it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14897, initial)>
running thread  <Thread(Thread-14898, initial)>
running thread  <Thread(Thread-14899, initial)>
running thread  <Thread(Thread-14900, initial)>
running thread  <Thread(Thread-14901, initial)>
running thread  <Thread(Thread-14902, initial)>
running thread  <Thread(Thread-14903, initial)>
running thread  <Thread(Thread-14904, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 95.00it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14905, initial)>
running thread  <Thread(Thread-14906, initial)>
running thread  <Thread(Thread-14907, initial)>
running thread  <Thread(Thread-14908, initial)>
running thread  <Thread(Thread-14909, initial)>
running thread  <Thread(Thread-14910, initial)>
running thread  <Thread(Thread-14911, initial)>
running thread  <Thread(Thread-14912, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 100.70it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14913, initial)>
running thread  <Thread(Thread-14914, initial)>
running thread  <Thread(Thread-14915, initial)>
running thread  <Thread(Thread-14916, initial)>
running thread  <Thread(Thread-14917, initial)>
running thread  <Thread(Thread-14918, initial)>
running thread  <Thread(Thread-14919, initial)>
running thread  <Thread(Thread-14920, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 116.56it/s]


current loss: [[6.60169442e+207]]
running thread  <Thread(Thread-14921, initial)>
running thread  <Thread(Thread-14922, initial)>
running thread  <Thread(Thread-14923, initial)>
running thread  <Thread(Thread-14924, initial)>
running thread  <Thread(Thread-14925, initial)>
running thread  <Thread(Thread-14926, initial)>
running thread  <Thread(Thread-14927, initial)>
running thread  <Thread(Thread-14928, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 109.17it/s]


current loss: [[2.59062654e+16]]
running thread  <Thread(Thread-14929, initial)>
running thread  <Thread(Thread-14930, initial)>
running thread  <Thread(Thread-14931, initial)>
running thread  <Thread(Thread-14932, initial)>
running thread  <Thread(Thread-14933, initial)>
running thread  <Thread(Thread-14934, initial)>
running thread  <Thread(Thread-14935, initial)>
running thread  <Thread(Thread-14936, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.62it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14937, initial)>
running thread  <Thread(Thread-14938, initial)>
running thread  <Thread(Thread-14939, initial)>
running thread  <Thread(Thread-14940, initial)>
running thread  <Thread(Thread-14941, initial)>
running thread  <Thread(Thread-14942, initial)>
running thread  <Thread(Thread-14943, initial)>
running thread  <Thread(Thread-14944, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 107.60it/s]


current loss: [[1.31698031e+196]]
running thread  <Thread(Thread-14945, initial)>
running thread  <Thread(Thread-14946, initial)>
running thread  <Thread(Thread-14947, initial)>
running thread  <Thread(Thread-14948, initial)>
running thread  <Thread(Thread-14949, initial)>
running thread  <Thread(Thread-14950, initial)>
running thread  <Thread(Thread-14951, initial)>
running thread  <Thread(Thread-14952, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 113.59it/s]


current loss: [[1.33836817e+42]]
running thread  <Thread(Thread-14953, initial)>
running thread  <Thread(Thread-14954, initial)>
running thread  <Thread(Thread-14955, initial)>
running thread  <Thread(Thread-14956, initial)>
running thread  <Thread(Thread-14957, initial)>
running thread  <Thread(Thread-14958, initial)>
running thread  <Thread(Thread-14959, initial)>
running thread  <Thread(Thread-14960, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.72it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14961, initial)>
running thread  <Thread(Thread-14962, initial)>
running thread  <Thread(Thread-14963, initial)>
running thread  <Thread(Thread-14964, initial)>
running thread  <Thread(Thread-14965, initial)>
running thread  <Thread(Thread-14966, initial)>
running thread  <Thread(Thread-14967, initial)>
running thread  <Thread(Thread-14968, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.87it/s]


current loss: [[inf]]
running thread  <Thread(Thread-14969, initial)>
running thread  <Thread(Thread-14970, initial)>
running thread  <Thread(Thread-14971, initial)>
running thread  <Thread(Thread-14972, initial)>
running thread  <Thread(Thread-14973, initial)>
running thread  <Thread(Thread-14974, initial)>
running thread  <Thread(Thread-14975, initial)>
running thread  <Thread(Thread-14976, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.08it/s]


current loss: [[8.5625037e+141]]
running thread  <Thread(Thread-14977, initial)>
running thread  <Thread(Thread-14978, initial)>
running thread  <Thread(Thread-14979, initial)>
running thread  <Thread(Thread-14980, initial)>
running thread  <Thread(Thread-14981, initial)>
running thread  <Thread(Thread-14982, initial)>
running thread  <Thread(Thread-14983, initial)>
running thread  <Thread(Thread-14984, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 95.33it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14985, initial)>
running thread  <Thread(Thread-14986, initial)>
running thread  <Thread(Thread-14987, initial)>
running thread  <Thread(Thread-14988, initial)>
running thread  <Thread(Thread-14989, initial)>
running thread  <Thread(Thread-14990, initial)>
running thread  <Thread(Thread-14991, initial)>
running thread  <Thread(Thread-14992, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.16it/s]


current loss: [[nan]]
running thread  <Thread(Thread-14993, initial)>
running thread  <Thread(Thread-14994, initial)>
running thread  <Thread(Thread-14995, initial)>
running thread  <Thread(Thread-14996, initial)>
running thread  <Thread(Thread-14997, initial)>
running thread  <Thread(Thread-14998, initial)>
running thread  <Thread(Thread-14999, initial)>
running thread  <Thread(Thread-15000, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 95.84it/s]


current loss: [[1.14406252e+103]]
running thread  <Thread(Thread-15001, initial)>
running thread  <Thread(Thread-15002, initial)>
running thread  <Thread(Thread-15003, initial)>
running thread  <Thread(Thread-15004, initial)>
running thread  <Thread(Thread-15005, initial)>
running thread  <Thread(Thread-15006, initial)>
running thread  <Thread(Thread-15007, initial)>
running thread  <Thread(Thread-15008, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 116.30it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15009, initial)>
running thread  <Thread(Thread-15010, initial)>
running thread  <Thread(Thread-15011, initial)>
running thread  <Thread(Thread-15012, initial)>
running thread  <Thread(Thread-15013, initial)>
running thread  <Thread(Thread-15014, initial)>
running thread  <Thread(Thread-15015, initial)>
running thread  <Thread(Thread-15016, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 115.07it/s]


current loss: [[9.91486854e+79]]
running thread  <Thread(Thread-15017, initial)>
running thread  <Thread(Thread-15018, initial)>
running thread  <Thread(Thread-15019, initial)>
running thread  <Thread(Thread-15020, initial)>
running thread  <Thread(Thread-15021, initial)>
running thread  <Thread(Thread-15022, initial)>
running thread  <Thread(Thread-15023, initial)>
running thread  <Thread(Thread-15024, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.13it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15025, initial)>
running thread  <Thread(Thread-15026, initial)>
running thread  <Thread(Thread-15027, initial)>
running thread  <Thread(Thread-15028, initial)>
running thread  <Thread(Thread-15029, initial)>
running thread  <Thread(Thread-15030, initial)>
running thread  <Thread(Thread-15031, initial)>
running thread  <Thread(Thread-15032, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 102.29it/s]


current loss: [[4.81459986e+122]]
running thread  <Thread(Thread-15033, initial)>
running thread  <Thread(Thread-15034, initial)>
running thread  <Thread(Thread-15035, initial)>
running thread  <Thread(Thread-15036, initial)>
running thread  <Thread(Thread-15037, initial)>
running thread  <Thread(Thread-15038, initial)>
running thread  <Thread(Thread-15039, initial)>
running thread  <Thread(Thread-15040, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 123.83it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15041, initial)>
running thread  <Thread(Thread-15042, initial)>
running thread  <Thread(Thread-15043, initial)>
running thread  <Thread(Thread-15044, initial)>
running thread  <Thread(Thread-15045, initial)>
running thread  <Thread(Thread-15046, initial)>
running thread  <Thread(Thread-15047, initial)>
running thread  <Thread(Thread-15048, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 126.80it/s]


current loss: [[2.55898304e+173]]
running thread  <Thread(Thread-15049, initial)>
running thread  <Thread(Thread-15050, initial)>
running thread  <Thread(Thread-15051, initial)>
running thread  <Thread(Thread-15052, initial)>
running thread  <Thread(Thread-15053, initial)>
running thread  <Thread(Thread-15054, initial)>
running thread  <Thread(Thread-15055, initial)>
running thread  <Thread(Thread-15056, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.50it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15057, initial)>
running thread  <Thread(Thread-15058, initial)>
running thread  <Thread(Thread-15059, initial)>
running thread  <Thread(Thread-15060, initial)>
running thread  <Thread(Thread-15061, initial)>
running thread  <Thread(Thread-15062, initial)>
running thread  <Thread(Thread-15063, initial)>
running thread  <Thread(Thread-15064, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 108.21it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15065, initial)>
running thread  <Thread(Thread-15066, initial)>
running thread  <Thread(Thread-15067, initial)>
running thread  <Thread(Thread-15068, initial)>
running thread  <Thread(Thread-15069, initial)>
running thread  <Thread(Thread-15070, initial)>
running thread  <Thread(Thread-15071, initial)>
running thread  <Thread(Thread-15072, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.20it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15073, initial)>
running thread  <Thread(Thread-15074, initial)>
running thread  <Thread(Thread-15075, initial)>
running thread  <Thread(Thread-15076, initial)>
running thread  <Thread(Thread-15077, initial)>
running thread  <Thread(Thread-15078, initial)>
running thread  <Thread(Thread-15079, initial)>
running thread  <Thread(Thread-15080, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 113.90it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15081, initial)>
running thread  <Thread(Thread-15082, initial)>
running thread  <Thread(Thread-15083, initial)>
running thread  <Thread(Thread-15084, initial)>
running thread  <Thread(Thread-15085, initial)>
running thread  <Thread(Thread-15086, initial)>
running thread  <Thread(Thread-15087, initial)>
running thread  <Thread(Thread-15088, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 93.88it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15089, initial)>
running thread  <Thread(Thread-15090, initial)>
running thread  <Thread(Thread-15091, initial)>
running thread  <Thread(Thread-15092, initial)>
running thread  <Thread(Thread-15093, initial)>
running thread  <Thread(Thread-15094, initial)>
running thread  <Thread(Thread-15095, initial)>
running thread  <Thread(Thread-15096, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 113.11it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15097, initial)>
running thread  <Thread(Thread-15098, initial)>
running thread  <Thread(Thread-15099, initial)>
running thread  <Thread(Thread-15100, initial)>
running thread  <Thread(Thread-15101, initial)>
running thread  <Thread(Thread-15102, initial)>
running thread  <Thread(Thread-15103, initial)>
running thread  <Thread(Thread-15104, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.20it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15105, initial)>
running thread  <Thread(Thread-15106, initial)>
running thread  <Thread(Thread-15107, initial)>
running thread  <Thread(Thread-15108, initial)>
running thread  <Thread(Thread-15109, initial)>
running thread  <Thread(Thread-15110, initial)>
running thread  <Thread(Thread-15111, initial)>
running thread  <Thread(Thread-15112, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 108.86it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15113, initial)>
running thread  <Thread(Thread-15114, initial)>
running thread  <Thread(Thread-15115, initial)>
running thread  <Thread(Thread-15116, initial)>
running thread  <Thread(Thread-15117, initial)>
running thread  <Thread(Thread-15118, initial)>
running thread  <Thread(Thread-15119, initial)>
running thread  <Thread(Thread-15120, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 116.28it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15121, initial)>
running thread  <Thread(Thread-15122, initial)>
running thread  <Thread(Thread-15123, initial)>
running thread  <Thread(Thread-15124, initial)>
running thread  <Thread(Thread-15125, initial)>
running thread  <Thread(Thread-15126, initial)>
running thread  <Thread(Thread-15127, initial)>
running thread  <Thread(Thread-15128, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.55it/s]


current loss: [[8.73690767e+298]]
running thread  <Thread(Thread-15129, initial)>
running thread  <Thread(Thread-15130, initial)>
running thread  <Thread(Thread-15131, initial)>
running thread  <Thread(Thread-15132, initial)>
running thread  <Thread(Thread-15133, initial)>
running thread  <Thread(Thread-15134, initial)>
running thread  <Thread(Thread-15135, initial)>
running thread  <Thread(Thread-15136, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.67it/s]


current loss: [[7.2955241e+224]]
running thread  <Thread(Thread-15137, initial)>
running thread  <Thread(Thread-15138, initial)>
running thread  <Thread(Thread-15139, initial)>
running thread  <Thread(Thread-15140, initial)>
running thread  <Thread(Thread-15141, initial)>
running thread  <Thread(Thread-15142, initial)>
running thread  <Thread(Thread-15143, initial)>
running thread  <Thread(Thread-15144, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.12it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15145, initial)>
running thread  <Thread(Thread-15146, initial)>
running thread  <Thread(Thread-15147, initial)>
running thread  <Thread(Thread-15148, initial)>
running thread  <Thread(Thread-15149, initial)>
running thread  <Thread(Thread-15150, initial)>
running thread  <Thread(Thread-15151, initial)>
running thread  <Thread(Thread-15152, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 100.44it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15153, initial)>
running thread  <Thread(Thread-15154, initial)>
running thread  <Thread(Thread-15155, initial)>
running thread  <Thread(Thread-15156, initial)>
running thread  <Thread(Thread-15157, initial)>
running thread  <Thread(Thread-15158, initial)>
running thread  <Thread(Thread-15159, initial)>
running thread  <Thread(Thread-15160, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.09it/s]


current loss: [[4697.29239219]]
running thread  <Thread(Thread-15161, initial)>
running thread  <Thread(Thread-15162, initial)>
running thread  <Thread(Thread-15163, initial)>
running thread  <Thread(Thread-15164, initial)>
running thread  <Thread(Thread-15165, initial)>
running thread  <Thread(Thread-15166, initial)>
running thread  <Thread(Thread-15167, initial)>
running thread  <Thread(Thread-15168, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.60it/s]


current loss: [[1.94109318e+32]]
running thread  <Thread(Thread-15169, initial)>
running thread  <Thread(Thread-15170, initial)>
running thread  <Thread(Thread-15171, initial)>
running thread  <Thread(Thread-15172, initial)>
running thread  <Thread(Thread-15173, initial)>
running thread  <Thread(Thread-15174, initial)>
running thread  <Thread(Thread-15175, initial)>
running thread  <Thread(Thread-15176, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.36it/s]


current loss: [[2.95189352e+136]]
running thread  <Thread(Thread-15177, initial)>
running thread  <Thread(Thread-15178, initial)>
running thread  <Thread(Thread-15179, initial)>
running thread  <Thread(Thread-15180, initial)>
running thread  <Thread(Thread-15181, initial)>
running thread  <Thread(Thread-15182, initial)>
running thread  <Thread(Thread-15183, initial)>
running thread  <Thread(Thread-15184, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 109.06it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15185, initial)>
running thread  <Thread(Thread-15186, initial)>
running thread  <Thread(Thread-15187, initial)>
running thread  <Thread(Thread-15188, initial)>
running thread  <Thread(Thread-15189, initial)>
running thread  <Thread(Thread-15190, initial)>
running thread  <Thread(Thread-15191, initial)>
running thread  <Thread(Thread-15192, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 94.30it/s] 


current loss: [[2.41879885e+108]]
running thread  <Thread(Thread-15193, initial)>
running thread  <Thread(Thread-15194, initial)>
running thread  <Thread(Thread-15195, initial)>
running thread  <Thread(Thread-15196, initial)>
running thread  <Thread(Thread-15197, initial)>
running thread  <Thread(Thread-15198, initial)>
running thread  <Thread(Thread-15199, initial)>
running thread  <Thread(Thread-15200, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.96it/s]


current loss: [[5.34385229e+266]]
running thread  <Thread(Thread-15201, initial)>
running thread  <Thread(Thread-15202, initial)>
running thread  <Thread(Thread-15203, initial)>
running thread  <Thread(Thread-15204, initial)>
running thread  <Thread(Thread-15205, initial)>
running thread  <Thread(Thread-15206, initial)>
running thread  <Thread(Thread-15207, initial)>
running thread  <Thread(Thread-15208, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 123.86it/s]


current loss: [[7.22160175e+122]]
running thread  <Thread(Thread-15209, initial)>
running thread  <Thread(Thread-15210, initial)>
running thread  <Thread(Thread-15211, initial)>
running thread  <Thread(Thread-15212, initial)>
running thread  <Thread(Thread-15213, initial)>
running thread  <Thread(Thread-15214, initial)>
running thread  <Thread(Thread-15215, initial)>
running thread  <Thread(Thread-15216, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.36it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15217, initial)>
running thread  <Thread(Thread-15218, initial)>
running thread  <Thread(Thread-15219, initial)>
running thread  <Thread(Thread-15220, initial)>
running thread  <Thread(Thread-15221, initial)>
running thread  <Thread(Thread-15222, initial)>
running thread  <Thread(Thread-15223, initial)>
running thread  <Thread(Thread-15224, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 117.50it/s]


current loss: [[2.32397459e+228]]
running thread  <Thread(Thread-15225, initial)>
running thread  <Thread(Thread-15226, initial)>
running thread  <Thread(Thread-15227, initial)>
running thread  <Thread(Thread-15228, initial)>
running thread  <Thread(Thread-15229, initial)>
running thread  <Thread(Thread-15230, initial)>
running thread  <Thread(Thread-15231, initial)>
running thread  <Thread(Thread-15232, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 114.25it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15233, initial)>
running thread  <Thread(Thread-15234, initial)>
running thread  <Thread(Thread-15235, initial)>
running thread  <Thread(Thread-15236, initial)>
running thread  <Thread(Thread-15237, initial)>
running thread  <Thread(Thread-15238, initial)>
running thread  <Thread(Thread-15239, initial)>
running thread  <Thread(Thread-15240, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 96.23it/s]


current loss: [[2.36198965e+243]]
running thread  <Thread(Thread-15241, initial)>
running thread  <Thread(Thread-15242, initial)>
running thread  <Thread(Thread-15243, initial)>
running thread  <Thread(Thread-15244, initial)>
running thread  <Thread(Thread-15245, initial)>
running thread  <Thread(Thread-15246, initial)>
running thread  <Thread(Thread-15247, initial)>
running thread  <Thread(Thread-15248, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 115.96it/s]


current loss: [[1.35318677e+48]]
running thread  <Thread(Thread-15249, initial)>
running thread  <Thread(Thread-15250, initial)>
running thread  <Thread(Thread-15251, initial)>
running thread  <Thread(Thread-15252, initial)>
running thread  <Thread(Thread-15253, initial)>
running thread  <Thread(Thread-15254, initial)>
running thread  <Thread(Thread-15255, initial)>
running thread  <Thread(Thread-15256, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 113.34it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15257, initial)>
running thread  <Thread(Thread-15258, initial)>
running thread  <Thread(Thread-15259, initial)>
running thread  <Thread(Thread-15260, initial)>
running thread  <Thread(Thread-15261, initial)>
running thread  <Thread(Thread-15262, initial)>
running thread  <Thread(Thread-15263, initial)>
running thread  <Thread(Thread-15264, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.90it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15265, initial)>
running thread  <Thread(Thread-15266, initial)>
running thread  <Thread(Thread-15267, initial)>
running thread  <Thread(Thread-15268, initial)>
running thread  <Thread(Thread-15269, initial)>
running thread  <Thread(Thread-15270, initial)>
running thread  <Thread(Thread-15271, initial)>
running thread  <Thread(Thread-15272, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 95.49it/s]


current loss: [[47.98655236]]
running thread  <Thread(Thread-15273, initial)>
running thread  <Thread(Thread-15274, initial)>
running thread  <Thread(Thread-15275, initial)>
running thread  <Thread(Thread-15276, initial)>
running thread  <Thread(Thread-15277, initial)>
running thread  <Thread(Thread-15278, initial)>
running thread  <Thread(Thread-15279, initial)>
running thread  <Thread(Thread-15280, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 124.45it/s]


current loss: [[4.42362223e+194]]
running thread  <Thread(Thread-15281, initial)>
running thread  <Thread(Thread-15282, initial)>
running thread  <Thread(Thread-15283, initial)>
running thread  <Thread(Thread-15284, initial)>
running thread  <Thread(Thread-15285, initial)>
running thread  <Thread(Thread-15286, initial)>
running thread  <Thread(Thread-15287, initial)>
running thread  <Thread(Thread-15288, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.27it/s]


current loss: [[3.55805492e+210]]
running thread  <Thread(Thread-15289, initial)>
running thread  <Thread(Thread-15290, initial)>
running thread  <Thread(Thread-15291, initial)>
running thread  <Thread(Thread-15292, initial)>
running thread  <Thread(Thread-15293, initial)>
running thread  <Thread(Thread-15294, initial)>
running thread  <Thread(Thread-15295, initial)>
running thread  <Thread(Thread-15296, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 102.87it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15297, initial)>
running thread  <Thread(Thread-15298, initial)>
running thread  <Thread(Thread-15299, initial)>
running thread  <Thread(Thread-15300, initial)>
running thread  <Thread(Thread-15301, initial)>
running thread  <Thread(Thread-15302, initial)>
running thread  <Thread(Thread-15303, initial)>
running thread  <Thread(Thread-15304, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.94it/s]


current loss: [[5.83504137e+70]]
running thread  <Thread(Thread-15305, initial)>
running thread  <Thread(Thread-15306, initial)>
running thread  <Thread(Thread-15307, initial)>
running thread  <Thread(Thread-15308, initial)>
running thread  <Thread(Thread-15309, initial)>
running thread  <Thread(Thread-15310, initial)>
running thread  <Thread(Thread-15311, initial)>
running thread  <Thread(Thread-15312, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 117.92it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15313, initial)>
running thread  <Thread(Thread-15314, initial)>
running thread  <Thread(Thread-15315, initial)>
running thread  <Thread(Thread-15316, initial)>
running thread  <Thread(Thread-15317, initial)>
running thread  <Thread(Thread-15318, initial)>
running thread  <Thread(Thread-15319, initial)>
running thread  <Thread(Thread-15320, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 119.61it/s]


current loss: [[8.41827863e+48]]
running thread  <Thread(Thread-15321, initial)>
running thread  <Thread(Thread-15322, initial)>
running thread  <Thread(Thread-15323, initial)>
running thread  <Thread(Thread-15324, initial)>
running thread  <Thread(Thread-15325, initial)>
running thread  <Thread(Thread-15326, initial)>
running thread  <Thread(Thread-15327, initial)>
running thread  <Thread(Thread-15328, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 102.33it/s]


current loss: [[264.37605812]]
running thread  <Thread(Thread-15329, initial)>
running thread  <Thread(Thread-15330, initial)>
running thread  <Thread(Thread-15331, initial)>
running thread  <Thread(Thread-15332, initial)>
running thread  <Thread(Thread-15333, initial)>
running thread  <Thread(Thread-15334, initial)>
running thread  <Thread(Thread-15335, initial)>
running thread  <Thread(Thread-15336, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 123.03it/s]


current loss: [[4.86422407e+229]]
running thread  <Thread(Thread-15337, initial)>
running thread  <Thread(Thread-15338, initial)>
running thread  <Thread(Thread-15339, initial)>
running thread  <Thread(Thread-15340, initial)>
running thread  <Thread(Thread-15341, initial)>
running thread  <Thread(Thread-15342, initial)>
running thread  <Thread(Thread-15343, initial)>
running thread  <Thread(Thread-15344, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.08it/s]


current loss: [[3.80420777e+69]]
running thread  <Thread(Thread-15345, initial)>
running thread  <Thread(Thread-15346, initial)>
running thread  <Thread(Thread-15347, initial)>
running thread  <Thread(Thread-15348, initial)>
running thread  <Thread(Thread-15349, initial)>
running thread  <Thread(Thread-15350, initial)>
running thread  <Thread(Thread-15351, initial)>
running thread  <Thread(Thread-15352, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 126.17it/s]


current loss: [[1.98030129e+286]]
running thread  <Thread(Thread-15353, initial)>
running thread  <Thread(Thread-15354, initial)>
running thread  <Thread(Thread-15355, initial)>
running thread  <Thread(Thread-15356, initial)>
running thread  <Thread(Thread-15357, initial)>
running thread  <Thread(Thread-15358, initial)>
running thread  <Thread(Thread-15359, initial)>
running thread  <Thread(Thread-15360, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 117.07it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15361, initial)>
running thread  <Thread(Thread-15362, initial)>
running thread  <Thread(Thread-15363, initial)>
running thread  <Thread(Thread-15364, initial)>
running thread  <Thread(Thread-15365, initial)>
running thread  <Thread(Thread-15366, initial)>
running thread  <Thread(Thread-15367, initial)>
running thread  <Thread(Thread-15368, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 107.02it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15369, initial)>
running thread  <Thread(Thread-15370, initial)>
running thread  <Thread(Thread-15371, initial)>
running thread  <Thread(Thread-15372, initial)>
running thread  <Thread(Thread-15373, initial)>
running thread  <Thread(Thread-15374, initial)>
running thread  <Thread(Thread-15375, initial)>
running thread  <Thread(Thread-15376, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 119.25it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15377, initial)>
running thread  <Thread(Thread-15378, initial)>
running thread  <Thread(Thread-15379, initial)>
running thread  <Thread(Thread-15380, initial)>
running thread  <Thread(Thread-15381, initial)>
running thread  <Thread(Thread-15382, initial)>
running thread  <Thread(Thread-15383, initial)>
running thread  <Thread(Thread-15384, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 89.23it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15385, initial)>
running thread  <Thread(Thread-15386, initial)>
running thread  <Thread(Thread-15387, initial)>
running thread  <Thread(Thread-15388, initial)>
running thread  <Thread(Thread-15389, initial)>
running thread  <Thread(Thread-15390, initial)>
running thread  <Thread(Thread-15391, initial)>
running thread  <Thread(Thread-15392, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.41it/s]


current loss: [[4.36556644e+186]]
running thread  <Thread(Thread-15393, initial)>
running thread  <Thread(Thread-15394, initial)>
running thread  <Thread(Thread-15395, initial)>
running thread  <Thread(Thread-15396, initial)>
running thread  <Thread(Thread-15397, initial)>
running thread  <Thread(Thread-15398, initial)>
running thread  <Thread(Thread-15399, initial)>
running thread  <Thread(Thread-15400, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 119.72it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15401, initial)>
running thread  <Thread(Thread-15402, initial)>
running thread  <Thread(Thread-15403, initial)>
running thread  <Thread(Thread-15404, initial)>
running thread  <Thread(Thread-15405, initial)>
running thread  <Thread(Thread-15406, initial)>
running thread  <Thread(Thread-15407, initial)>
running thread  <Thread(Thread-15408, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.82it/s]


current loss: [[1.11426915e+285]]
running thread  <Thread(Thread-15409, initial)>
running thread  <Thread(Thread-15410, initial)>
running thread  <Thread(Thread-15411, initial)>
running thread  <Thread(Thread-15412, initial)>
running thread  <Thread(Thread-15413, initial)>
running thread  <Thread(Thread-15414, initial)>
running thread  <Thread(Thread-15415, initial)>
running thread  <Thread(Thread-15416, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.72it/s] 


current loss: [[8.9383176e+82]]
running thread  <Thread(Thread-15417, initial)>
running thread  <Thread(Thread-15418, initial)>
running thread  <Thread(Thread-15419, initial)>
running thread  <Thread(Thread-15420, initial)>
running thread  <Thread(Thread-15421, initial)>
running thread  <Thread(Thread-15422, initial)>
running thread  <Thread(Thread-15423, initial)>
running thread  <Thread(Thread-15424, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 113.48it/s]


current loss: [[3.30714429e+196]]
running thread  <Thread(Thread-15425, initial)>
running thread  <Thread(Thread-15426, initial)>
running thread  <Thread(Thread-15427, initial)>
running thread  <Thread(Thread-15428, initial)>
running thread  <Thread(Thread-15429, initial)>
running thread  <Thread(Thread-15430, initial)>
running thread  <Thread(Thread-15431, initial)>
running thread  <Thread(Thread-15432, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 90.49it/s]


current loss: [[1.68873562e+249]]
running thread  <Thread(Thread-15433, initial)>
running thread  <Thread(Thread-15434, initial)>
running thread  <Thread(Thread-15435, initial)>
running thread  <Thread(Thread-15436, initial)>
running thread  <Thread(Thread-15437, initial)>
running thread  <Thread(Thread-15438, initial)>
running thread  <Thread(Thread-15439, initial)>
running thread  <Thread(Thread-15440, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 102.92it/s]


current loss: [[2.98074643e+94]]
running thread  <Thread(Thread-15441, initial)>
running thread  <Thread(Thread-15442, initial)>
running thread  <Thread(Thread-15443, initial)>
running thread  <Thread(Thread-15444, initial)>
running thread  <Thread(Thread-15445, initial)>
running thread  <Thread(Thread-15446, initial)>
running thread  <Thread(Thread-15447, initial)>
running thread  <Thread(Thread-15448, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.30it/s]


current loss: [[1.19723481e+293]]
running thread  <Thread(Thread-15449, initial)>
running thread  <Thread(Thread-15450, initial)>
running thread  <Thread(Thread-15451, initial)>
running thread  <Thread(Thread-15452, initial)>
running thread  <Thread(Thread-15453, initial)>
running thread  <Thread(Thread-15454, initial)>
running thread  <Thread(Thread-15455, initial)>
running thread  <Thread(Thread-15456, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 98.34it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15457, initial)>
running thread  <Thread(Thread-15458, initial)>
running thread  <Thread(Thread-15459, initial)>
running thread  <Thread(Thread-15460, initial)>
running thread  <Thread(Thread-15461, initial)>
running thread  <Thread(Thread-15462, initial)>
running thread  <Thread(Thread-15463, initial)>
running thread  <Thread(Thread-15464, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.66it/s]


current loss: [[4.52218218e+273]]
running thread  <Thread(Thread-15465, initial)>
running thread  <Thread(Thread-15466, initial)>
running thread  <Thread(Thread-15467, initial)>
running thread  <Thread(Thread-15468, initial)>
running thread  <Thread(Thread-15469, initial)>
running thread  <Thread(Thread-15470, initial)>
running thread  <Thread(Thread-15471, initial)>
running thread  <Thread(Thread-15472, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 98.74it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15473, initial)>
running thread  <Thread(Thread-15474, initial)>
running thread  <Thread(Thread-15475, initial)>
running thread  <Thread(Thread-15476, initial)>
running thread  <Thread(Thread-15477, initial)>
running thread  <Thread(Thread-15478, initial)>
running thread  <Thread(Thread-15479, initial)>
running thread  <Thread(Thread-15480, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 114.55it/s]


current loss: [[1.05308732e+141]]
running thread  <Thread(Thread-15481, initial)>
running thread  <Thread(Thread-15482, initial)>
running thread  <Thread(Thread-15483, initial)>
running thread  <Thread(Thread-15484, initial)>
running thread  <Thread(Thread-15485, initial)>
running thread  <Thread(Thread-15486, initial)>
running thread  <Thread(Thread-15487, initial)>
running thread  <Thread(Thread-15488, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 94.66it/s]


current loss: [[1314.39785517]]
running thread  <Thread(Thread-15489, initial)>
running thread  <Thread(Thread-15490, initial)>
running thread  <Thread(Thread-15491, initial)>
running thread  <Thread(Thread-15492, initial)>
running thread  <Thread(Thread-15493, initial)>
running thread  <Thread(Thread-15494, initial)>
running thread  <Thread(Thread-15495, initial)>
running thread  <Thread(Thread-15496, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.54it/s]


current loss: [[7.28205055e+268]]
running thread  <Thread(Thread-15497, initial)>
running thread  <Thread(Thread-15498, initial)>
running thread  <Thread(Thread-15499, initial)>
running thread  <Thread(Thread-15500, initial)>
running thread  <Thread(Thread-15501, initial)>
running thread  <Thread(Thread-15502, initial)>
running thread  <Thread(Thread-15503, initial)>
running thread  <Thread(Thread-15504, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 98.61it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15505, initial)>
running thread  <Thread(Thread-15506, initial)>
running thread  <Thread(Thread-15507, initial)>
running thread  <Thread(Thread-15508, initial)>
running thread  <Thread(Thread-15509, initial)>
running thread  <Thread(Thread-15510, initial)>
running thread  <Thread(Thread-15511, initial)>
running thread  <Thread(Thread-15512, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 115.84it/s]


current loss: [[3.63448243e+277]]
running thread  <Thread(Thread-15513, initial)>
running thread  <Thread(Thread-15514, initial)>
running thread  <Thread(Thread-15515, initial)>
running thread  <Thread(Thread-15516, initial)>
running thread  <Thread(Thread-15517, initial)>
running thread  <Thread(Thread-15518, initial)>
running thread  <Thread(Thread-15519, initial)>
running thread  <Thread(Thread-15520, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 91.14it/s]


current loss: [[94.13038241]]
running thread  <Thread(Thread-15521, initial)>
running thread  <Thread(Thread-15522, initial)>
running thread  <Thread(Thread-15523, initial)>
running thread  <Thread(Thread-15524, initial)>
running thread  <Thread(Thread-15525, initial)>
running thread  <Thread(Thread-15526, initial)>
running thread  <Thread(Thread-15527, initial)>
running thread  <Thread(Thread-15528, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 96.90it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15529, initial)>
running thread  <Thread(Thread-15530, initial)>
running thread  <Thread(Thread-15531, initial)>
running thread  <Thread(Thread-15532, initial)>
running thread  <Thread(Thread-15533, initial)>
running thread  <Thread(Thread-15534, initial)>
running thread  <Thread(Thread-15535, initial)>
running thread  <Thread(Thread-15536, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 108.08it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15537, initial)>
running thread  <Thread(Thread-15538, initial)>
running thread  <Thread(Thread-15539, initial)>
running thread  <Thread(Thread-15540, initial)>
running thread  <Thread(Thread-15541, initial)>
running thread  <Thread(Thread-15542, initial)>
running thread  <Thread(Thread-15543, initial)>
running thread  <Thread(Thread-15544, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 106.71it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15545, initial)>
running thread  <Thread(Thread-15546, initial)>
running thread  <Thread(Thread-15547, initial)>
running thread  <Thread(Thread-15548, initial)>
running thread  <Thread(Thread-15549, initial)>
running thread  <Thread(Thread-15550, initial)>
running thread  <Thread(Thread-15551, initial)>
running thread  <Thread(Thread-15552, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 96.56it/s]


current loss: [[1.41179134e+87]]
running thread  <Thread(Thread-15553, initial)>
running thread  <Thread(Thread-15554, initial)>
running thread  <Thread(Thread-15555, initial)>
running thread  <Thread(Thread-15556, initial)>
running thread  <Thread(Thread-15557, initial)>
running thread  <Thread(Thread-15558, initial)>
running thread  <Thread(Thread-15559, initial)>
running thread  <Thread(Thread-15560, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 88.76it/s]


current loss: [[7.09315625e+189]]
running thread  <Thread(Thread-15561, initial)>
running thread  <Thread(Thread-15562, initial)>
running thread  <Thread(Thread-15563, initial)>
running thread  <Thread(Thread-15564, initial)>
running thread  <Thread(Thread-15565, initial)>
running thread  <Thread(Thread-15566, initial)>
running thread  <Thread(Thread-15567, initial)>
running thread  <Thread(Thread-15568, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 113.56it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15569, initial)>
running thread  <Thread(Thread-15570, initial)>
running thread  <Thread(Thread-15571, initial)>
running thread  <Thread(Thread-15572, initial)>
running thread  <Thread(Thread-15573, initial)>
running thread  <Thread(Thread-15574, initial)>
running thread  <Thread(Thread-15575, initial)>
running thread  <Thread(Thread-15576, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.86it/s]


current loss: [[1.54969582e+245]]
running thread  <Thread(Thread-15577, initial)>
running thread  <Thread(Thread-15578, initial)>
running thread  <Thread(Thread-15579, initial)>
running thread  <Thread(Thread-15580, initial)>
running thread  <Thread(Thread-15581, initial)>
running thread  <Thread(Thread-15582, initial)>
running thread  <Thread(Thread-15583, initial)>
running thread  <Thread(Thread-15584, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 96.60it/s] 


current loss: [[inf]]
running thread  <Thread(Thread-15585, initial)>
running thread  <Thread(Thread-15586, initial)>
running thread  <Thread(Thread-15587, initial)>
running thread  <Thread(Thread-15588, initial)>
running thread  <Thread(Thread-15589, initial)>
running thread  <Thread(Thread-15590, initial)>
running thread  <Thread(Thread-15591, initial)>
running thread  <Thread(Thread-15592, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 113.17it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15593, initial)>
running thread  <Thread(Thread-15594, initial)>
running thread  <Thread(Thread-15595, initial)>
running thread  <Thread(Thread-15596, initial)>
running thread  <Thread(Thread-15597, initial)>
running thread  <Thread(Thread-15598, initial)>
running thread  <Thread(Thread-15599, initial)>
running thread  <Thread(Thread-15600, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.91it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15601, initial)>
running thread  <Thread(Thread-15602, initial)>
running thread  <Thread(Thread-15603, initial)>
running thread  <Thread(Thread-15604, initial)>
running thread  <Thread(Thread-15605, initial)>
running thread  <Thread(Thread-15606, initial)>
running thread  <Thread(Thread-15607, initial)>
running thread  <Thread(Thread-15608, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.66it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15609, initial)>
running thread  <Thread(Thread-15610, initial)>
running thread  <Thread(Thread-15611, initial)>
running thread  <Thread(Thread-15612, initial)>
running thread  <Thread(Thread-15613, initial)>
running thread  <Thread(Thread-15614, initial)>
running thread  <Thread(Thread-15615, initial)>
running thread  <Thread(Thread-15616, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.01it/s]


current loss: [[4.51370077e+199]]
running thread  <Thread(Thread-15617, initial)>
running thread  <Thread(Thread-15618, initial)>
running thread  <Thread(Thread-15619, initial)>
running thread  <Thread(Thread-15620, initial)>
running thread  <Thread(Thread-15621, initial)>
running thread  <Thread(Thread-15622, initial)>
running thread  <Thread(Thread-15623, initial)>
running thread  <Thread(Thread-15624, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.27it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15625, initial)>
running thread  <Thread(Thread-15626, initial)>
running thread  <Thread(Thread-15627, initial)>
running thread  <Thread(Thread-15628, initial)>
running thread  <Thread(Thread-15629, initial)>
running thread  <Thread(Thread-15630, initial)>
running thread  <Thread(Thread-15631, initial)>
running thread  <Thread(Thread-15632, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 107.07it/s]


current loss: [[4.36362534e+288]]
running thread  <Thread(Thread-15633, initial)>
running thread  <Thread(Thread-15634, initial)>
running thread  <Thread(Thread-15635, initial)>
running thread  <Thread(Thread-15636, initial)>
running thread  <Thread(Thread-15637, initial)>
running thread  <Thread(Thread-15638, initial)>
running thread  <Thread(Thread-15639, initial)>
running thread  <Thread(Thread-15640, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.29it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15641, initial)>
running thread  <Thread(Thread-15642, initial)>
running thread  <Thread(Thread-15643, initial)>
running thread  <Thread(Thread-15644, initial)>
running thread  <Thread(Thread-15645, initial)>
running thread  <Thread(Thread-15646, initial)>
running thread  <Thread(Thread-15647, initial)>
running thread  <Thread(Thread-15648, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 116.13it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15649, initial)>
running thread  <Thread(Thread-15650, initial)>
running thread  <Thread(Thread-15651, initial)>
running thread  <Thread(Thread-15652, initial)>
running thread  <Thread(Thread-15653, initial)>
running thread  <Thread(Thread-15654, initial)>
running thread  <Thread(Thread-15655, initial)>
running thread  <Thread(Thread-15656, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.35it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15657, initial)>
running thread  <Thread(Thread-15658, initial)>
running thread  <Thread(Thread-15659, initial)>
running thread  <Thread(Thread-15660, initial)>
running thread  <Thread(Thread-15661, initial)>
running thread  <Thread(Thread-15662, initial)>
running thread  <Thread(Thread-15663, initial)>
running thread  <Thread(Thread-15664, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 109.70it/s]


current loss: [[3.61214559e+230]]
running thread  <Thread(Thread-15665, initial)>
running thread  <Thread(Thread-15666, initial)>
running thread  <Thread(Thread-15667, initial)>
running thread  <Thread(Thread-15668, initial)>
running thread  <Thread(Thread-15669, initial)>
running thread  <Thread(Thread-15670, initial)>
running thread  <Thread(Thread-15671, initial)>
running thread  <Thread(Thread-15672, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.54it/s]


current loss: [[3.37192538e+220]]
running thread  <Thread(Thread-15673, initial)>
running thread  <Thread(Thread-15674, initial)>
running thread  <Thread(Thread-15675, initial)>
running thread  <Thread(Thread-15676, initial)>
running thread  <Thread(Thread-15677, initial)>
running thread  <Thread(Thread-15678, initial)>
running thread  <Thread(Thread-15679, initial)>
running thread  <Thread(Thread-15680, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 108.37it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15681, initial)>
running thread  <Thread(Thread-15682, initial)>
running thread  <Thread(Thread-15683, initial)>
running thread  <Thread(Thread-15684, initial)>
running thread  <Thread(Thread-15685, initial)>
running thread  <Thread(Thread-15686, initial)>
running thread  <Thread(Thread-15687, initial)>
running thread  <Thread(Thread-15688, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.14it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15689, initial)>
running thread  <Thread(Thread-15690, initial)>
running thread  <Thread(Thread-15691, initial)>
running thread  <Thread(Thread-15692, initial)>
running thread  <Thread(Thread-15693, initial)>
running thread  <Thread(Thread-15694, initial)>
running thread  <Thread(Thread-15695, initial)>
running thread  <Thread(Thread-15696, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 123.06it/s]


current loss: [[2.5069424e+307]]
running thread  <Thread(Thread-15697, initial)>
running thread  <Thread(Thread-15698, initial)>
running thread  <Thread(Thread-15699, initial)>
running thread  <Thread(Thread-15700, initial)>
running thread  <Thread(Thread-15701, initial)>
running thread  <Thread(Thread-15702, initial)>
running thread  <Thread(Thread-15703, initial)>
running thread  <Thread(Thread-15704, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 102.44it/s]


current loss: [[2.21089766e+138]]
running thread  <Thread(Thread-15705, initial)>
running thread  <Thread(Thread-15706, initial)>
running thread  <Thread(Thread-15707, initial)>
running thread  <Thread(Thread-15708, initial)>
running thread  <Thread(Thread-15709, initial)>
running thread  <Thread(Thread-15710, initial)>
running thread  <Thread(Thread-15711, initial)>
running thread  <Thread(Thread-15712, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 100.89it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15713, initial)>
running thread  <Thread(Thread-15714, initial)>
running thread  <Thread(Thread-15715, initial)>
running thread  <Thread(Thread-15716, initial)>
running thread  <Thread(Thread-15717, initial)>
running thread  <Thread(Thread-15718, initial)>
running thread  <Thread(Thread-15719, initial)>
running thread  <Thread(Thread-15720, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.87it/s] 


current loss: [[nan]]
running thread  <Thread(Thread-15721, initial)>
running thread  <Thread(Thread-15722, initial)>
running thread  <Thread(Thread-15723, initial)>
running thread  <Thread(Thread-15724, initial)>
running thread  <Thread(Thread-15725, initial)>
running thread  <Thread(Thread-15726, initial)>
running thread  <Thread(Thread-15727, initial)>
running thread  <Thread(Thread-15728, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 101.53it/s]


current loss: [[9.72575359e+200]]
running thread  <Thread(Thread-15729, initial)>
running thread  <Thread(Thread-15730, initial)>
running thread  <Thread(Thread-15731, initial)>
running thread  <Thread(Thread-15732, initial)>
running thread  <Thread(Thread-15733, initial)>
running thread  <Thread(Thread-15734, initial)>
running thread  <Thread(Thread-15735, initial)>
running thread  <Thread(Thread-15736, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 123.06it/s]


current loss: [[2.98236852e+158]]
running thread  <Thread(Thread-15737, initial)>
running thread  <Thread(Thread-15738, initial)>
running thread  <Thread(Thread-15739, initial)>
running thread  <Thread(Thread-15740, initial)>
running thread  <Thread(Thread-15741, initial)>
running thread  <Thread(Thread-15742, initial)>
running thread  <Thread(Thread-15743, initial)>
running thread  <Thread(Thread-15744, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 98.11it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15745, initial)>
running thread  <Thread(Thread-15746, initial)>
running thread  <Thread(Thread-15747, initial)>
running thread  <Thread(Thread-15748, initial)>
running thread  <Thread(Thread-15749, initial)>
running thread  <Thread(Thread-15750, initial)>
running thread  <Thread(Thread-15751, initial)>
running thread  <Thread(Thread-15752, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.14it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15753, initial)>
running thread  <Thread(Thread-15754, initial)>
running thread  <Thread(Thread-15755, initial)>
running thread  <Thread(Thread-15756, initial)>
running thread  <Thread(Thread-15757, initial)>
running thread  <Thread(Thread-15758, initial)>
running thread  <Thread(Thread-15759, initial)>
running thread  <Thread(Thread-15760, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.59it/s]


current loss: [[4.03331901e+191]]
running thread  <Thread(Thread-15761, initial)>
running thread  <Thread(Thread-15762, initial)>
running thread  <Thread(Thread-15763, initial)>
running thread  <Thread(Thread-15764, initial)>
running thread  <Thread(Thread-15765, initial)>
running thread  <Thread(Thread-15766, initial)>
running thread  <Thread(Thread-15767, initial)>
running thread  <Thread(Thread-15768, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 112.06it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15769, initial)>
running thread  <Thread(Thread-15770, initial)>
running thread  <Thread(Thread-15771, initial)>
running thread  <Thread(Thread-15772, initial)>
running thread  <Thread(Thread-15773, initial)>
running thread  <Thread(Thread-15774, initial)>
running thread  <Thread(Thread-15775, initial)>
running thread  <Thread(Thread-15776, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 98.37it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15777, initial)>
running thread  <Thread(Thread-15778, initial)>
running thread  <Thread(Thread-15779, initial)>
running thread  <Thread(Thread-15780, initial)>
running thread  <Thread(Thread-15781, initial)>
running thread  <Thread(Thread-15782, initial)>
running thread  <Thread(Thread-15783, initial)>
running thread  <Thread(Thread-15784, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.56it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15785, initial)>
running thread  <Thread(Thread-15786, initial)>
running thread  <Thread(Thread-15787, initial)>
running thread  <Thread(Thread-15788, initial)>
running thread  <Thread(Thread-15789, initial)>
running thread  <Thread(Thread-15790, initial)>
running thread  <Thread(Thread-15791, initial)>
running thread  <Thread(Thread-15792, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 94.95it/s]


current loss: [[1.22024717e+210]]
running thread  <Thread(Thread-15793, initial)>
running thread  <Thread(Thread-15794, initial)>
running thread  <Thread(Thread-15795, initial)>
running thread  <Thread(Thread-15796, initial)>
running thread  <Thread(Thread-15797, initial)>
running thread  <Thread(Thread-15798, initial)>
running thread  <Thread(Thread-15799, initial)>
running thread  <Thread(Thread-15800, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 111.96it/s]


current loss: [[1.05153057e+229]]
running thread  <Thread(Thread-15801, initial)>
running thread  <Thread(Thread-15802, initial)>
running thread  <Thread(Thread-15803, initial)>
running thread  <Thread(Thread-15804, initial)>
running thread  <Thread(Thread-15805, initial)>
running thread  <Thread(Thread-15806, initial)>
running thread  <Thread(Thread-15807, initial)>
running thread  <Thread(Thread-15808, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.97it/s]


current loss: [[2.11634519e+107]]
running thread  <Thread(Thread-15809, initial)>
running thread  <Thread(Thread-15810, initial)>
running thread  <Thread(Thread-15811, initial)>
running thread  <Thread(Thread-15812, initial)>
running thread  <Thread(Thread-15813, initial)>
running thread  <Thread(Thread-15814, initial)>
running thread  <Thread(Thread-15815, initial)>
running thread  <Thread(Thread-15816, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 116.48it/s]


current loss: [[1.9437942e+218]]
running thread  <Thread(Thread-15817, initial)>
running thread  <Thread(Thread-15818, initial)>
running thread  <Thread(Thread-15819, initial)>
running thread  <Thread(Thread-15820, initial)>
running thread  <Thread(Thread-15821, initial)>
running thread  <Thread(Thread-15822, initial)>
running thread  <Thread(Thread-15823, initial)>
running thread  <Thread(Thread-15824, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.13it/s]


current loss: [[8.42184844e+24]]
running thread  <Thread(Thread-15825, initial)>
running thread  <Thread(Thread-15826, initial)>
running thread  <Thread(Thread-15827, initial)>
running thread  <Thread(Thread-15828, initial)>
running thread  <Thread(Thread-15829, initial)>
running thread  <Thread(Thread-15830, initial)>
running thread  <Thread(Thread-15831, initial)>
running thread  <Thread(Thread-15832, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.69it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15833, initial)>
running thread  <Thread(Thread-15834, initial)>
running thread  <Thread(Thread-15835, initial)>
running thread  <Thread(Thread-15836, initial)>
running thread  <Thread(Thread-15837, initial)>
running thread  <Thread(Thread-15838, initial)>
running thread  <Thread(Thread-15839, initial)>
running thread  <Thread(Thread-15840, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.26it/s]


current loss: [[4.93129633e+279]]
running thread  <Thread(Thread-15841, initial)>
running thread  <Thread(Thread-15842, initial)>
running thread  <Thread(Thread-15843, initial)>
running thread  <Thread(Thread-15844, initial)>
running thread  <Thread(Thread-15845, initial)>
running thread  <Thread(Thread-15846, initial)>
running thread  <Thread(Thread-15847, initial)>
running thread  <Thread(Thread-15848, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 98.19it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15849, initial)>
running thread  <Thread(Thread-15850, initial)>
running thread  <Thread(Thread-15851, initial)>
running thread  <Thread(Thread-15852, initial)>
running thread  <Thread(Thread-15853, initial)>
running thread  <Thread(Thread-15854, initial)>
running thread  <Thread(Thread-15855, initial)>
running thread  <Thread(Thread-15856, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 104.30it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15857, initial)>
running thread  <Thread(Thread-15858, initial)>
running thread  <Thread(Thread-15859, initial)>
running thread  <Thread(Thread-15860, initial)>
running thread  <Thread(Thread-15861, initial)>
running thread  <Thread(Thread-15862, initial)>
running thread  <Thread(Thread-15863, initial)>
running thread  <Thread(Thread-15864, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.76it/s]


current loss: [[5.71899555e+307]]
running thread  <Thread(Thread-15865, initial)>
running thread  <Thread(Thread-15866, initial)>
running thread  <Thread(Thread-15867, initial)>
running thread  <Thread(Thread-15868, initial)>
running thread  <Thread(Thread-15869, initial)>
running thread  <Thread(Thread-15870, initial)>
running thread  <Thread(Thread-15871, initial)>
running thread  <Thread(Thread-15872, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 96.67it/s] 


current loss: [[inf]]
running thread  <Thread(Thread-15873, initial)>
running thread  <Thread(Thread-15874, initial)>
running thread  <Thread(Thread-15875, initial)>
running thread  <Thread(Thread-15876, initial)>
running thread  <Thread(Thread-15877, initial)>
running thread  <Thread(Thread-15878, initial)>
running thread  <Thread(Thread-15879, initial)>
running thread  <Thread(Thread-15880, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 97.05it/s] 


current loss: [[inf]]
running thread  <Thread(Thread-15881, initial)>
running thread  <Thread(Thread-15882, initial)>
running thread  <Thread(Thread-15883, initial)>
running thread  <Thread(Thread-15884, initial)>
running thread  <Thread(Thread-15885, initial)>
running thread  <Thread(Thread-15886, initial)>
running thread  <Thread(Thread-15887, initial)>
running thread  <Thread(Thread-15888, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 114.50it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15889, initial)>
running thread  <Thread(Thread-15890, initial)>
running thread  <Thread(Thread-15891, initial)>
running thread  <Thread(Thread-15892, initial)>
running thread  <Thread(Thread-15893, initial)>
running thread  <Thread(Thread-15894, initial)>
running thread  <Thread(Thread-15895, initial)>
running thread  <Thread(Thread-15896, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 96.88it/s]


current loss: [[3.86568163e+51]]
running thread  <Thread(Thread-15897, initial)>
running thread  <Thread(Thread-15898, initial)>
running thread  <Thread(Thread-15899, initial)>
running thread  <Thread(Thread-15900, initial)>
running thread  <Thread(Thread-15901, initial)>
running thread  <Thread(Thread-15902, initial)>
running thread  <Thread(Thread-15903, initial)>
running thread  <Thread(Thread-15904, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.28it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15905, initial)>
running thread  <Thread(Thread-15906, initial)>
running thread  <Thread(Thread-15907, initial)>
running thread  <Thread(Thread-15908, initial)>
running thread  <Thread(Thread-15909, initial)>
running thread  <Thread(Thread-15910, initial)>
running thread  <Thread(Thread-15911, initial)>
running thread  <Thread(Thread-15912, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 113.34it/s]


current loss: [[1.62598493e+187]]
running thread  <Thread(Thread-15913, initial)>
running thread  <Thread(Thread-15914, initial)>
running thread  <Thread(Thread-15915, initial)>
running thread  <Thread(Thread-15916, initial)>
running thread  <Thread(Thread-15917, initial)>
running thread  <Thread(Thread-15918, initial)>
running thread  <Thread(Thread-15919, initial)>
running thread  <Thread(Thread-15920, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 123.83it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15921, initial)>
running thread  <Thread(Thread-15922, initial)>
running thread  <Thread(Thread-15923, initial)>
running thread  <Thread(Thread-15924, initial)>
running thread  <Thread(Thread-15925, initial)>
running thread  <Thread(Thread-15926, initial)>
running thread  <Thread(Thread-15927, initial)>
running thread  <Thread(Thread-15928, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 92.89it/s]


current loss: [[1.12945105e+161]]
running thread  <Thread(Thread-15929, initial)>
running thread  <Thread(Thread-15930, initial)>
running thread  <Thread(Thread-15931, initial)>
running thread  <Thread(Thread-15932, initial)>
running thread  <Thread(Thread-15933, initial)>
running thread  <Thread(Thread-15934, initial)>
running thread  <Thread(Thread-15935, initial)>
running thread  <Thread(Thread-15936, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 121.03it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15937, initial)>
running thread  <Thread(Thread-15938, initial)>
running thread  <Thread(Thread-15939, initial)>
running thread  <Thread(Thread-15940, initial)>
running thread  <Thread(Thread-15941, initial)>
running thread  <Thread(Thread-15942, initial)>
running thread  <Thread(Thread-15943, initial)>
running thread  <Thread(Thread-15944, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 99.91it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15945, initial)>
running thread  <Thread(Thread-15946, initial)>
running thread  <Thread(Thread-15947, initial)>
running thread  <Thread(Thread-15948, initial)>
running thread  <Thread(Thread-15949, initial)>
running thread  <Thread(Thread-15950, initial)>
running thread  <Thread(Thread-15951, initial)>
running thread  <Thread(Thread-15952, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.07it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15953, initial)>
running thread  <Thread(Thread-15954, initial)>
running thread  <Thread(Thread-15955, initial)>
running thread  <Thread(Thread-15956, initial)>
running thread  <Thread(Thread-15957, initial)>
running thread  <Thread(Thread-15958, initial)>
running thread  <Thread(Thread-15959, initial)>
running thread  <Thread(Thread-15960, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 114.66it/s]


current loss: [[1.50584152e+255]]
running thread  <Thread(Thread-15961, initial)>
running thread  <Thread(Thread-15962, initial)>
running thread  <Thread(Thread-15963, initial)>
running thread  <Thread(Thread-15964, initial)>
running thread  <Thread(Thread-15965, initial)>
running thread  <Thread(Thread-15966, initial)>
running thread  <Thread(Thread-15967, initial)>
running thread  <Thread(Thread-15968, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 125.33it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15969, initial)>
running thread  <Thread(Thread-15970, initial)>
running thread  <Thread(Thread-15971, initial)>
running thread  <Thread(Thread-15972, initial)>
running thread  <Thread(Thread-15973, initial)>
running thread  <Thread(Thread-15974, initial)>
running thread  <Thread(Thread-15975, initial)>
running thread  <Thread(Thread-15976, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.97it/s]


current loss: [[2.46337234e+209]]
running thread  <Thread(Thread-15977, initial)>
running thread  <Thread(Thread-15978, initial)>
running thread  <Thread(Thread-15979, initial)>
running thread  <Thread(Thread-15980, initial)>
running thread  <Thread(Thread-15981, initial)>
running thread  <Thread(Thread-15982, initial)>
running thread  <Thread(Thread-15983, initial)>
running thread  <Thread(Thread-15984, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.51it/s]


current loss: [[inf]]
running thread  <Thread(Thread-15985, initial)>
running thread  <Thread(Thread-15986, initial)>
running thread  <Thread(Thread-15987, initial)>
running thread  <Thread(Thread-15988, initial)>
running thread  <Thread(Thread-15989, initial)>
running thread  <Thread(Thread-15990, initial)>
running thread  <Thread(Thread-15991, initial)>
running thread  <Thread(Thread-15992, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 105.69it/s]


current loss: [[nan]]
running thread  <Thread(Thread-15993, initial)>
running thread  <Thread(Thread-15994, initial)>
running thread  <Thread(Thread-15995, initial)>
running thread  <Thread(Thread-15996, initial)>
running thread  <Thread(Thread-15997, initial)>
running thread  <Thread(Thread-15998, initial)>
running thread  <Thread(Thread-15999, initial)>
running thread  <Thread(Thread-16000, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 117.82it/s]


current loss: [[nan]]
running thread  <Thread(Thread-16001, initial)>
running thread  <Thread(Thread-16002, initial)>
running thread  <Thread(Thread-16003, initial)>
running thread  <Thread(Thread-16004, initial)>
running thread  <Thread(Thread-16005, initial)>
running thread  <Thread(Thread-16006, initial)>
running thread  <Thread(Thread-16007, initial)>
running thread  <Thread(Thread-16008, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.70it/s]


current loss: [[4.51229928e+305]]
running thread  <Thread(Thread-16009, initial)>
running thread  <Thread(Thread-16010, initial)>
running thread  <Thread(Thread-16011, initial)>
running thread  <Thread(Thread-16012, initial)>
running thread  <Thread(Thread-16013, initial)>
running thread  <Thread(Thread-16014, initial)>
running thread  <Thread(Thread-16015, initial)>
running thread  <Thread(Thread-16016, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 103.87it/s]


current loss: [[inf]]
running thread  <Thread(Thread-16017, initial)>
running thread  <Thread(Thread-16018, initial)>
running thread  <Thread(Thread-16019, initial)>
running thread  <Thread(Thread-16020, initial)>
running thread  <Thread(Thread-16021, initial)>
running thread  <Thread(Thread-16022, initial)>
running thread  <Thread(Thread-16023, initial)>
running thread  <Thread(Thread-16024, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 94.25it/s]


current loss: [[1.35205636e+192]]
running thread  <Thread(Thread-16025, initial)>
running thread  <Thread(Thread-16026, initial)>
running thread  <Thread(Thread-16027, initial)>
running thread  <Thread(Thread-16028, initial)>
running thread  <Thread(Thread-16029, initial)>
running thread  <Thread(Thread-16030, initial)>
running thread  <Thread(Thread-16031, initial)>
running thread  <Thread(Thread-16032, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 120.77it/s]

current loss: [[6.01253559e+203]]
0.7685527843976404
tensor([0.1612, 0.0375, 0.3463])
tensor([3.2033e+01, 6.0981e+01, 4.7278e+01, 7.5783e+01, 2.8606e+06, 1.0052e+07,
        9.8964e+03, 3.7004e+32, 2.9284e+01,        inf, 2.2172e+01, 1.5604e+27,
        7.6855e-01, 3.3270e+17, 3.5897e+02, 1.1224e+01, 4.0533e+00,        inf,
        4.3999e+24,        inf])
tensor([[ 1.0000e-04,  1.0000e-04,  1.0000e-04,  1.2919e+00, -2.3357e-01,
          9.6428e-01, -2.3842e-01,  8.2893e-02, -6.7754e-01,  2.8579e-01,
          3.8074e-01, -1.4973e+00,  1.6121e-01, -8.9280e-01, -2.0270e+00,
          5.4311e-01,  1.2185e-01, -1.9261e-01, -7.0933e-03, -1.2539e+00],
        [ 1.0000e-03,  1.0000e-03,  1.0000e-03,  5.4977e-02,  1.4342e-01,
         -8.0022e-03,  1.5816e-01, -1.4176e-02,  1.3476e-03, -6.8986e-02,
          9.0205e-02, -6.0348e-02,  3.7487e-02,  1.5619e-01, -5.5493e-03,
          5.3249e-02,  5.3686e-02, -3.0258e-01,  9.8412e-02,  8.1510e-02],
        [ 9.0000e-01,  9.0000e-01,  9.0000e-01,




In [288]:
# the weight matrix generated under the optimal hyperparameter set
W = sgd_mss_with_momentum_threaded(Xs=Xs_tr, Ys=Ys_tr, gamma=0.1612, W0=W0, alpha=0.0375, beta=0.3463, B=8, num_epochs=40, num_threads=8)

running thread  <Thread(Thread-16033, initial)>
running thread  <Thread(Thread-16034, initial)>
running thread  <Thread(Thread-16035, initial)>
running thread  <Thread(Thread-16036, initial)>
running thread  <Thread(Thread-16037, initial)>
running thread  <Thread(Thread-16038, initial)>
running thread  <Thread(Thread-16039, initial)>
running thread  <Thread(Thread-16040, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:00<00:00, 110.06it/s]

current loss: [[41.83394781]]





In [291]:
# use the model to predict the approximate number of the scanned receipts for each day of 2022
q = deque()
warm_up = [Receipt_Count[j] for j in range(364, 364 - day_range, -1)]
for count in warm_up:
    q.append(count)
predictions = []
for _ in range(365):
    X = numpy.array(q, dtype=float).reshape(30,)
    predictions.append(((W @ (X - mean) / std) * std + mean).item())
    q.popleft()
    q.append(predictions[-1])
# calculate the predicted number of the scanned receipts for each month of 2022
df = pandas.DataFrame()
df.index = pandas.date_range(start='1/1/2022', end='12/31/2022')
df["count"] = predictions
monthSum = df.groupby(df.index.month).sum()

In [293]:
df # predicted data

Unnamed: 0,count
2022-01-01,9.846695e+06
2022-01-02,9.976004e+06
2022-01-03,1.002773e+07
2022-01-04,9.902056e+06
2022-01-05,9.906046e+06
...,...
2022-12-27,8.932000e+06
2022-12-28,8.942787e+06
2022-12-29,8.936760e+06
2022-12-30,8.920917e+06


In [294]:
# save the trained linear model
with open('trained linear model.pkl','wb') as f:
    pickle.dump(monthSum, f)