In [1]:
#!/usr/bin/env python3
import os
import sys

# BEGIN THREAD SETTINGS this sets the number of threads used by numpy in the program
# (should be set to 1 to avoid implicit parallelism)
implicit_num_threads = 1
os.environ["OMP_NUM_THREADS"] = str(implicit_num_threads)
os.environ["MKL_NUM_THREADS"] = str(implicit_num_threads)
os.environ["OPENBLAS_NUM_THREADS"] = str(implicit_num_threads)
# END THREAD SETTINGS

import numpy as np
import torch
from numpy import random
import matplotlib
import pickle
matplotlib.use('agg')
from matplotlib import pyplot as plt
import threading
import time
import pandas
from collections import deque

from tqdm import tqdm
from google.colab import files, drive

In [2]:
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
path = "/content/gdrive/MyDrive/data_daily.csv"
# reading the CSV file
csvFile = pandas.read_csv(path)
 
# displaying the contents of the CSV file
print(csvFile)

         # Date  Receipt_Count
0    2021-01-01        7564766
1    2021-01-02        7455524
2    2021-01-03        7095414
3    2021-01-04        7666163
4    2021-01-05        7771289
..          ...            ...
360  2021-12-27       10350408
361  2021-12-28       10219445
362  2021-12-29       10313337
363  2021-12-30       10310644
364  2021-12-31       10211187

[365 rows x 2 columns]


In [4]:
def train_test_split(Xs, Ys, test_size, random_state):
    random.seed(random_state)
    random.shuffle(Xs)
    random.shuffle(Ys)
    test_set_size = int(test_size * len(Ys))
    Xs_tr, Xs_va, Ys_tr, Ys_va = Xs[test_set_size:], Xs[:test_set_size], Ys[test_set_size:], Ys[:test_set_size]
    return Xs_tr, Xs_va, Ys_tr, Ys_va

In [5]:
# setting the days ahead range for predicting the approximate number of the scanned receipts for a future day
day_range = 30 # using the data from day 0 to day 29 to predict day 30

# constructing features and target variables
Receipt_Count = csvFile["Receipt_Count"].array.to_numpy()
Xs = [[Receipt_Count[j] for j in range(i, i + day_range)] for i in range(335)]
Ys = Receipt_Count[day_range:]
assert len(Xs) == len(Ys)
# perform train-validation (0.8 vs 0.2) split
Xs_tr, Xs_va, Ys_tr, Ys_va = train_test_split(Xs, 
                                              Ys, 
                                              test_size = 0.2, 
                                              random_state = 123)

In [6]:
# normalization of input data
mean = np.mean(Receipt_Count)
std = np.std(Receipt_Count)
Xs_tr = np.array(Xs_tr, dtype=float).reshape(day_range, -1)
Xs_va = np.array(Xs_va, dtype=float).reshape(day_range, -1)
Ys_tr = np.array(Ys_tr, dtype=float).reshape(1, -1)
Ys_va = np.array(Ys_va, dtype=float).reshape(1, -1)
Xs_tr = (Xs_tr - mean) / std
Ys_tr = (Ys_tr - mean) / std
Xs_tr = (Xs_va - mean) / std
Ys_tr = (Ys_va - mean) / std
receipts_dataset = (Xs_tr, Xs_va, Ys_tr, Ys_va)
print(Xs_tr.shape, Xs_va.shape, Ys_tr.shape, Ys_va.shape)

(30, 67) (30, 67) (1, 67) (1, 67)


In [7]:
# weight matrix initialization
h = 5
W10 = np.zeros((h, len(Xs_tr)))
W20 = np.zeros((len(Ys_tr), h))
print(W10.shape, W20.shape)

(5, 30) (1, 5)


In [8]:
### a function to create a unique increasing ID
### note that this is just a quick-and-easy way to create a global order
### it's not the only way to do it
global_order_counter = 0
def get_next_order():
    global global_order_counter
    rv = global_order_counter
    global_order_counter = global_order_counter + 1
    return rv

In [9]:
### a helper function to convert constants into BackproppableArray objects
def to_ba(x):
    if isinstance(x, BackproppableArray):
        return x
    elif isinstance(x, np.ndarray):
        return BackproppableArray(x)
    elif isinstance(x, float):
        return BackproppableArray(np.array(x))
    elif isinstance(x, int):
        return BackproppableArray(np.array(float(x)))
    else:
        raise Exception("could not convert {} to BackproppableArray".format(x))

In [10]:
### a class for an array that can be "packpropped-through"
class BackproppableArray(object):
    # np_array     numpy array that stores the data for this object
    def __init__(self, np_array, dependencies=[]):
        super().__init__()
        self.data = np_array

        # grad holds the gradient, an array of the same shape as data
        # before backprop, grad is None
        # during backprop before grad_fn is called, grad holds the partially accumulated gradient
        # after backprop, grad holds the gradient of the loss (the thing we call backward on)
        #     with respect to this array
        # if you want to use the same array object to call backward twice, you need to re-initialize
        #     grad to zero first
        self.grad = np.zeros(np_array.shape, dtype="float64")

        # an counter that increments monotonically over the course of the application
        # we know that arrays with higher order must depend only on arrays with lower order
        # we can use this to order the arrays for backpropagation
        self.order = get_next_order()

        # a list of other BackproppableArray objects on which this array directly depends
        # we'll use this later to decide which BackproppableArray objects need to participate in the backward pass
        self.dependencies = dependencies

    # represents me as a string
    def __repr__(self):
        return "({}, type={})".format(self.data, type(self).__name__)

    # returns a list containing this array and ALL the dependencies of this array, not just
    #    the direct dependencies listed in self.dependencies
    # that is, this list should include this array, the arrays in self.dependencies,
    #     plus all the arrays those arrays depend on, plus all the arrays THOSE arrays depend on, et cetera
    # the returned list must only include each dependency ONCE
    def all_dependencies(self):
        # TODO: (1.1) implement some sort of search to get all the dependencies
        res = [self]
        if not self.dependencies: return res
        q = deque()
        for i in range(len(self.dependencies)):
            q.append(self.dependencies[i])
        while q:
            for _ in range(len(q)):
                node = q.popleft()
                if node not in res:
                    node.grad = np.zeros(node.data.shape, dtype="float64")
                    res.append(node)
                for j in range(len(node.dependencies)):
                    q.append(node.dependencies[j])
        return res
            

    # compute gradients of this array with respect to everything it depends on
    def backward(self):
        # can only take the gradient of a scalar
        #assert(self.data.size == 1)

        # depth-first search to find all dependencies of this array
        all_my_dependencies = self.all_dependencies()
        # TODO: (1.2) implement the backward pass to compute the gradients
        #   this should do the following
        #   (1) sort the found dependencies so that the ones computed last go FIRST
        all_my_dependencies = sorted(all_my_dependencies, key = lambda x: x.order, reverse = True)
        #   (2) initialize and zero out all the gradient accumulators (.grad) for all the dependencies
        for dependency in all_my_dependencies:
            if len(dependency.data.shape)==0:
                dependency.grad = np.array(0.0)
            else:
                dependency = np.zeros(dependency.data.shape, dtype="float64")
        #   (3) set the gradient accumulator of this array to 1, as an initial condition
        #           since the gradient of a number with respect to itself is 1
        if len(self.data.shape)==0:
            self.grad = np.array(1.0)
        else:
            self.grad = np.ones(self.data.shape, dtype="float64")
        #   (4) call the backward function for all the dependencies in the sorted reverse order
        for dependency in all_my_dependencies:
            dependency.grad_fn()

    # function that is called to process a single step of backprop for this array
    # when called, it must be the case that self.grad contains the gradient of the loss (the
    #     thing we are differentating) with respect to this array
    # this function should update the .grad field of its dependencies
    #
    # this should just say "pass" for the parent class
    #
    # child classes override this
    def grad_fn(self):
        pass

    # operator overloading
    def __add__(self, other):
        return BA_Add(self, to_ba(other))
    def __sub__(self, other):
        return BA_Sub(self, to_ba(other))
    def __mul__(self, other):
        return BA_Mul(self, to_ba(other))
    def __truediv__(self, other):
        return BA_Div(self, to_ba(other))

    def __radd__(self, other):
        return BA_Add(to_ba(other), self)
    def __rsub__(self, other):
        return BA_Sub(to_ba(other), self)
    def __rmul__(self, other):
        return BA_Mul(to_ba(other), self)
    def __rtruediv__(self, other):
        return BA_Div(to_ba(other), self)

    # TODO (2.2) Add operator overloading for matrix multiplication
    def __matmul__(self, other):
        return BA_MatMul(self, to_ba(other))
    def __rmatmul__(self, other):
        return BA_MatMul(to_ba(other), self)
    
    def sum(self, axis=None, keepdims=True):
        return BA_Sum(self, axis)

    def reshape(self, shape):
        return BA_Reshape(self, shape)

    def transpose(self, axes = None):
        if axes is None:
            axes = range(self.data.ndim)[::-1]
        return BA_Transpose(self, axes)

In [11]:
# a class for an array that's the result of an addition operation
class BA_Add(BackproppableArray):
    # x + y
    def __init__(self, x, y):
        super().__init__(x.data + y.data, [x,y])
        self.x = x
        self.y = y

    def grad_fn(self):
        # TODO: (2.3) improve grad fn for Add
        if len(self.x.data.shape)==0:
            self.x.grad = self.x.grad+self.grad
        else:
        
            if len(self.x.data) != 1:
                self.x.grad = self.x.grad+self.grad
            else:
                self.x.grad = self.x.grad+np.sum(self.grad)
        if len(self.y.data.shape)==0:
            self.y.grad = self.y.grad+self.grad
        else:
            if len(self.y.data) != 1:
                self.y.grad = self.y.grad+self.grad
            else:
                self.y.grad = self.y.grad+np.sum(self.grad)

In [12]:
# a class for an array that's the result of a subtraction operation
class BA_Sub(BackproppableArray):
    # x + y
    def __init__(self, x, y):
        super().__init__(x.data - y.data, [x,y])
        self.x = x
        self.y = y

    def grad_fn(self):
        # TODO: (1.3, 2.3) implement grad fn for Sub
        if len(self.x.data.shape)==0:
            self.x.grad = self.x.grad+self.grad
        else:
        
            if len(self.x.data) != 1:
                self.x.grad = self.x.grad++self.grad
            else:
                self.x.grad = self.x.grad++np.sum(self.grad)
        if len(self.y.data.shape)==0:
            self.y.grad = self.y.grad-self.grad
        else:
            if len(self.y.data) != 1:
                self.y.grad = self.y.grad+-self.grad
            else:
                self.y.grad = self.y.grad-np.sum(self.grad)

In [13]:
# a class for an array that's the result of a multiplication operation
class BA_Mul(BackproppableArray):
    # x * y
    def __init__(self, x, y):
        super().__init__(x.data * y.data, [x,y])
        self.x = x
        self.y = y

    def grad_fn(self):
        # TODO: (1.3, 2.3) implement grad fn for Mul
        if len(self.x.data.shape)==0:
            self.x.grad = self.x.grad+self.y.data*self.grad
        else:
            if len(self.x.data) != 1:
                self.x.grad = self.x.grad+self.y.data*self.grad
            else:
                self.x.grad = self.x.grad+np.sum(self.grad*self.y.data)
        if len(self.y.data.shape)==0:
            self.y.grad = self.y.grad+self.x.data*self.grad
        else:
            if len(self.y.data) != 1:
                self.y.grad = self.y.grad+self.x.data*self.grad
            else:
                self.y.grad = self.y.grad+np.sum(self.grad*self.x.data)

In [14]:
# a class for an array that's the result of a division operation
class BA_Div(BackproppableArray):
    # x / y
    def __init__(self, x, y):
        super().__init__(x.data / y.data, [x,y])
        self.x = x
        self.y = y

    def grad_fn(self):
        # TODO: (1.3, 2.3) implement grad fn for Div
        # self.x.grad += (1.0 / self.y.data)*self.grad
        # self.y.grad -= (self.x.data* self.grad)/(self.y.data**2)
        
        if len(self.x.data.shape)==0:
            self.x.grad = self.x.grad+(1.0 / self.y.data)*self.grad
        else:
            if len(self.x.data) != 1:
                self.x.grad = self.x.grad+(1.0 / self.y.data)*self.grad
            else:
                self.x.grad = self.x.grad+np.sum(self.grad/self.y.data)
        if len(self.y.data.shape)==0:
            self.y.grad = self.y.grad-(self.x.data* self.grad)/(self.y.data**2)
        else:
            if len(self.y.data) != 1:
                self.y.grad = self.y.grad-(self.x.data* self.grad)/(self.y.data**2)
            else:
                self.y.grad = self.y.grad-np.sum(self.grad*self.x.data/self.y.data**2)

In [15]:
# a class for an array that's the result of a matrix multiplication operation
class BA_MatMul(BackproppableArray):
    # x @ y
    def __init__(self, x, y):
        # we only support multiplication of matrices, i.e. arrays with shape of length 2
        assert(len(x.data.shape) == 2)
        assert(len(y.data.shape) == 2)
        super().__init__(x.data @ y.data, [x,y])
        self.x = x
        self.y = y

    def grad_fn(self):
        # TODO: (2.1) implement grad fn for MatMul
        self.x.grad = self.x.grad+self.grad@self.y.data.T
        self.y.grad = self.y.grad+self.x.data.T@self.grad

In [16]:
# a class for an array that's the result of an exponential operation
class BA_Exp(BackproppableArray):
    # exp(x)
    def __init__(self, x):
        super().__init__(np.exp(x.data), [x])
        self.x = x

    def grad_fn(self):
        # TODO: (1.3) implement grad fn for Exp
        self.x.grad = self.x.grad+np.exp(self.x.data)*self.grad

In [17]:
def exp(x):
    if isinstance(x, BackproppableArray):
        return BA_Exp(x)
    else:
        return np.exp(x)

In [18]:
# a class for an array that's the result of an logarithm operation
class BA_Log(BackproppableArray):
    # log(x)
    def __init__(self, x):
        super().__init__(np.log(x.data), [x])
        self.x = x

    def grad_fn(self):
        # TODO: (1.3) implement grad fn for Log
        self.x.grad = self.x.grad+(1.0 / self.x.data)*self.grad

In [19]:
def log(x):
    if isinstance(x, BackproppableArray):
        return BA_Log(x)
    else:
        return np.log(x)

In [20]:
# a class for an array that's the result of a sum operation
class BA_Sum(BackproppableArray):
    # x.sum(axis, keepdims=True)
    def __init__(self, x, axis):
        super().__init__(x.data.sum(axis, keepdims=True), [x])
        self.x = x
        self.axis = axis

    def grad_fn(self):
        # TODO: (2.1) implement grad fn for Sum
        if len(self.x.data.shape)==0:
            self.x.grad = self.x.grad+self.grad
        else:
            self.x.grad = self.x.grad+self.grad*np.ones(self.x.data.shape)

In [21]:
# a class for an array that's the result of a reshape operation
class BA_Reshape(BackproppableArray):
    # x.reshape(shape)
    def __init__(self, x, shape):
        super().__init__(x.data.reshape(shape), [x])
        self.x = x
        self.shape = shape

    def grad_fn(self):
        # TODO: (2.1) implement grad fn for Reshape
        if type(self.grad)!=float:
               self.x.grad = self.x.grad+self.grad.reshape(self.x.data.shape)

In [22]:
# a class for an array that's the result of a transpose operation
class BA_Transpose(BackproppableArray):
    # x.transpose(axes)
    def __init__(self, x, axes):
        super().__init__(x.data.transpose(axes), [x])
        self.x = x
        self.axes = axes

    def grad_fn(self):
        # TODO: (2.1) implement grad fn for Transpose
        if type(self.grad)!=float:
            self.x.grad = self.x.grad+np.transpose(self.grad, axes = self.axes)

In [23]:
class BA_Sigmoid(BackproppableArray):
    def __init__(self, x):
        super().__init__(1 / (1 + np.exp(-x.data)), [x])
        self.x = x
    def grad_fn(self):
        self.x.grad = self.x.grad + 1 / (1 + np.exp(-self.x.data)) * (1 - 1 / (1 + np.exp(-self.x.data))) * self.grad

In [24]:
def sigmoid(x):
    if isinstance(x, BackproppableArray):
        return BA_Sigmoid(x)
    else:
        return 1 / (1 + np.exp(-x))

In [25]:
# automatic derivative of scalar function f at x, using backprop
def backprop_diff(f, x):
    ba_x = to_ba(x)
    fx = f(ba_x)
    fx.backward()
    return ba_x.grad

In [32]:
# SGD + Momentum (threaded) for Neural Network
#
# Xs              training examples (d * n)
# Ys              training labels   (c * n)
# gamma           L2 regularization constant
# W0              the initial value of the parameters (c * d)
# alpha           step size/learning rate
# beta            momentum hyperparameter
# B               minibatch size
# num_epochs      number of epochs (passes through the training set) to run
# monitor_period  how frequently, in terms of batches (not epochs) to output the parameter vector
# num_threads     how many threads to use
#
# returns         the final model arrived at at the end of training
def nn_sgd_mss_with_momentum_threaded(Xs, Ys, gamma, W10, W20, alpha, beta, B, num_epochs, num_threads):
    (d, n) = Xs.shape
    (h, d) = W10.shape
    # perform global setup/initialization/allocation
    V1 = np.zeros(W10.shape)
    V2 = np.zeros(W20.shape)
    W1 = np.copy(W10)
    W2 = np.copy(W20)
    gradient1 = np.zeros(W10.shape)
    gradient2 = np.zeros(W20.shape)
    Bt = int(B / num_threads)

    # construct the barrier object
    iter_barrier = threading.Barrier(num_threads + 1)

    # a function for each thread to run
    def thread_main(ithread):
        # perform any per-thread allocations
        # avoid memory allocation in the running of program by pre-allocating memories ahead
        thread_gradient1 = np.zeros(W10.shape)
        thread_gradient2 = np.zeros(W20.shape)

        slices_X = []
        slices_Y = []
        for ibatch in range(int(n/B)):
            ii = range(ibatch*B + ithread*Bt, ibatch*B + (ithread+1)*Bt)
            slices_X.append(np.ascontiguousarray(Xs[:,ii]))
            slices_Y.append(np.ascontiguousarray(Ys[:,ii]))
        # gradint calculation (uses only pre-allocated memories to improve performance of SGD)
        for it in range(num_epochs):
            for ibatch in range(int(n/B)):
                # work done by thread in each iteration;
                # this section of code primarily uses numpy operations with the "out=" argument specified
                np.multiply(thread_gradient1, 0, out=thread_gradient1)
                np.add(thread_gradient1,\
                       backprop_diff(lambda W: neural_network_loss_1(slices_X[ibatch], slices_Y[ibatch], W, W2), W1), out=thread_gradient1)
                np.multiply(thread_gradient2, 0, out=thread_gradient2)
                np.add(thread_gradient2,\
                       backprop_diff(lambda W: neural_network_loss_2(slices_X[ibatch], slices_Y[ibatch], W1, W), W2), out=thread_gradient2)
                
                iter_barrier.wait() # wait for all threads to finish computation before moving up to next step
                np.add(gradient1, thread_gradient1, out=gradient1)
                np.add(gradient2, thread_gradient2, out=gradient2)
                
                iter_barrier.wait()

    worker_threads = [threading.Thread(target=thread_main, args=(it,)) for it in range(num_threads)]

    for t in worker_threads:
        print("running thread ", t)
        t.start()

    print("Running minibatch sequential-scan SGD with momentum (%d threads)" % num_threads)
    # gradient & momentum update:
    # v <- beta * v - alpha * gradient
    # w <- w + v
    for it in tqdm(range(num_epochs)):
        for ibatch in range(int(n/B)):
            np.multiply(gradient1, 0, out=gradient1)
            np.multiply(gradient2, 0, out=gradient2)
            iter_barrier.wait()
            # work done on a single thread at each iteration;
            # this section of code primarily uses numpy operations with the "out=" argument specified
            np.divide(gradient1, B, out=gradient1)
            np.multiply(beta, V1, out=V1)
            np.multiply(alpha, gradient1, out=gradient1)
            np.subtract(V1, gradient1, out=V1)
            np.add(W1, V1, out=W1)
            np.divide(gradient2, B, out=gradient2)
            np.multiply(beta, V2, out=V2)
            np.multiply(alpha, gradient2, out=gradient2)
            np.subtract(V2, gradient2, out=V2)
            np.add(W2, V2, out=W2)
            iter_barrier.wait()

    for t in worker_threads:
        t.join()

    print("current loss: " + str((W2 @ sigmoid(W1 @ Xs) - Ys) @ (W2 @ sigmoid(W1 @ Xs) - Ys).transpose())) # report current loss
    # return the learned model
    return W1, W2

In [33]:
# customized hyperparameter tryout
nn_sgd_mss_with_momentum_threaded(Xs=Xs_tr, Ys=Ys_tr, gamma=0.0001, W10=W10, W20=W20, alpha=0.001, beta=0.9, B=8, num_epochs=200, num_threads=8)

running thread  <Thread(Thread-11, initial)>
running thread  <Thread(Thread-12, initial)>
running thread  <Thread(Thread-13, initial)>
running thread  <Thread(Thread-14, initial)>
running thread  <Thread(Thread-15, initial)>
running thread  <Thread(Thread-16, initial)>
running thread  <Thread(Thread-17, initial)>
running thread  <Thread(Thread-18, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 200/200 [00:18<00:00, 10.98it/s]

current loss: [[49.55791879]]





(array([[-2.55844122e-03, -3.35173051e-03, -5.30038179e-03,
          2.25961259e-03,  5.54046327e-03,  7.06139460e-05,
         -1.27880822e-05,  6.14877023e-03, -7.04510279e-03,
         -5.11654413e-03, -4.61385844e-03,  9.98700775e-04,
          1.34734094e-03,  2.08066971e-05, -9.06537543e-05,
          4.36028589e-03, -4.43149891e-03, -1.09513450e-02,
         -3.75449972e-03,  4.73086053e-03,  5.15074252e-03,
          3.98984900e-03, -6.91025082e-03,  8.58127200e-03,
         -4.58574768e-03,  3.90778388e-03, -1.04859179e-03,
          6.61668308e-04,  8.97947829e-03, -4.67860270e-03],
        [-2.55844122e-03, -3.35173051e-03, -5.30038179e-03,
          2.25961259e-03,  5.54046327e-03,  7.06139460e-05,
         -1.27880822e-05,  6.14877023e-03, -7.04510279e-03,
         -5.11654413e-03, -4.61385844e-03,  9.98700775e-04,
          1.34734094e-03,  2.08066971e-05, -9.06537543e-05,
          4.36028589e-03, -4.43149891e-03, -1.09513450e-02,
         -3.75449972e-03,  4.73086053e-

In [34]:
# gradient descent to do the inner optimization step of Bayesian optimization
#
# objective     the objective function to minimize, as a function that takes a torch tensor and returns an expression
# x0            initial value to assign to variable (torch tensor)
# alpha         learning rate/step size
# num_iters     number of iterations of gradient descent
#
# returns     (obj_min, x_min), where
#       obj_min     the value of the objective after running iterations of gradient descent
#       x_min       the value of x after running iterations of gradient descent
def gradient_descent(objective, x0, alpha, num_iters):
    x = x0.detach().clone()  # create a fresh copy of x0
    x.requires_grad = True   # make it a target for differentiation
    opt = torch.optim.SGD([x], alpha)
    for it in range(num_iters):
        opt.zero_grad()
        f = objective(x)
        f.backward()
        opt.step()
    x.requires_grad = False  # make x no longer require gradients
    return (float(f.item()), x)

In [35]:
# compute the Gaussian RBF kernel matrix for a vector of data points (in PyTorch)
#
# Xs        points at which to compute the kernel (size: d x m)
# Zs        other points at which to compute the kernel (size: d x n)
# gamma     gamma parameter for the RBF kernel
#
# returns   an (m x n) matrix Sigma where Sigma[i,j] = K(Xs[:,i], Zs[:,j])
def rbf_kernel_matrix(Xs, Zs, gamma):
    m = Xs.shape[1] if len(Xs.shape) > 1 else 1
    n = Zs.shape[1] if len(Zs.shape) > 1 else 1
    sigma = [[torch.exp(-gamma * torch.linalg.norm(Xs[:, i] - Zs[:, j])**2) for j in range(n)] for i in range(m)]
    sigma = torch.tensor(sigma)
    return sigma

In [36]:
# compute the distribution predicted by a Gaussian process that uses an RBF kernel (in PyTorch)
#
# Xs            points at which to compute the kernel (size: d x n) where d is the number of parameters
# Ys            observed value at those points (size: n)
# gamma         gamma parameter for the RBF kernel
# sigma2_noise  the variance sigma^2 of the additive gaussian noise used in the model
#
# returns   a function that takes a value Xtest (size: d) and returns a tuple (mean, variance)
def gp_prediction(Xs, Ys, gamma, sigma2_noise):
    # first, do any work that can be shared among predictions
    sigma = rbf_kernel_matrix(Xs, Xs, gamma)
    n = Xs.shape[1]
    # next, define a nested function to return
    def prediction_mean_and_variance(Xtest):
        # construct mean and variance
        k = [torch.exp(-gamma * torch.linalg.norm(Xs[:, i] - Xtest)) for i in range(n)]
        k = torch.tensor(k)
        
        mean = k @ torch.linalg.inv(sigma + sigma2_noise * torch.eye(n)) @ Ys
        variance = torch.exp(-gamma * torch.linalg.norm(Xtest - Xtest)) + sigma2_noise -\
        k @ torch.linalg.inv(sigma + sigma2_noise * torch.eye(n)) @ k.T
        return (mean.reshape(()), variance.reshape(()))
    #finally, return the nested function
    return prediction_mean_and_variance

In [37]:
# run Bayesian optimization to minimize an objective
#
# objective     objective function; takes a torch tensor, returns a python float scalar
# d             dimension to optimize over
# gamma         gamma to use for RBF hyper-hyperparameter
# sigma2_noise  additive Gaussian noise parameter for Gaussian Process
# acquisition   acquisition function to use (e.g. ei_acquisition)
# random_x      function that returns a random sample of the parameter we're optimizing over (a torch tensor, e.g. for use in warmup)
# gd_nruns      number of random initializations we should use for gradient descent for the inner optimization step
# gd_alpha      learning rate for gradient descent
# gd_niters     number of iterations for gradient descent
# n_warmup      number of initial warmup evaluations of the objective to use
# num_iters     number of outer iterations of Bayes optimization to run (including warmup)
#
# returns       tuple of (y_best, x_best, Ys, Xs), where
#   y_best          objective value of best point found
#   x_best          best point found
#   Ys              vector of objective values for all points searched (size: num_iters)
#   Xs              matrix of all points searched (size: d x num_iters)
def bayes_opt(objective, d, gamma, sigma2_noise, acquisition, random_x, gd_nruns, gd_alpha, gd_niters, n_warmup, num_iters):
    y_best = float("inf")
    x_best = torch.zeros(size=(d,))
    Xs = []
    Ys = []
    # warm-up to prepare prior information for Bayesian Optimization
    for _ in range(n_warmup):
        x_i = torch.tensor([0.0001, 0.001, 0.9])
        y_i = objective(x_i)
        Xs.append(x_i)
        Ys.append(y_i)
        if y_i <= y_best:
            y_best = y_i
            x_best = x_i
    for _ in range(n_warmup, num_iters):
        Xs_vec = torch.stack(tensors=Xs, dim=1)
        Ys_vec = torch.tensor(Ys)
        prediction_fn =  gp_prediction(Xs_vec, Ys_vec, gamma, sigma2_noise)
        y = float("inf")
        x = torch.zeros(size=(d,))
        for _ in range(gd_nruns):
            x_0 = random_x(size=(d,))
            _, x_i = gradient_descent(objective=lambda x: acquisition(y_best, prediction_fn(x)[0], torch.sqrt(prediction_fn(x)[1])),\
                x0=x_0, alpha=gd_alpha, num_iters=gd_niters)
            y_i = objective(x_i)
            if y_i <= y:
                y = y_i
                x = x_i
        Xs.append(x)
        Ys.append(y)
        if y <= y_best:
            y_best = y
            x_best = x
    Xs_vec = torch.stack(tensors=Xs, dim=1)
    Ys_vec = torch.tensor(Ys)
    return y_best, x_best, Ys_vec, Xs_vec

In [38]:
# return a function that computes the lower confidence bound (LCB) acquisition function
#
# kappa     parameter for LCB
#
# returns   function that computes the LCB acquisition function
def lcb_acquisition(kappa):
    def A_lcb(Ybest, mean, stdev):
        return mean - kappa * stdev
    return A_lcb

In [41]:
# produce a function that runs SGD+Momentum on the receipts dataset, initializing the weights to zero
#
# mnist_dataset         the MNIST dataset, as returned by load_MNIST_dataset_with_validation_split
# num_epochs            number of epochs to run for
# B                     the batch size
#
# returns               a function that takes parameters
#   params                  a numpy vector of shape (3,) with entries that determine the hyperparameters, where
#       gamma = params[0]
#       alpha = params[1]
#       beta = params[2]
#                       and returns (the validation error of the final trained model after all the epochs) minus 0.9.
#                       if training diverged (i.e. any of the weights are non-finite) then return 0.1, which corresponds to an error of 1.
def receipts_dataset_sgd_mss_with_momentum(receipts_dataset, B, num_epochs, num_threads):
    def objective(params):
        Xs_tr, Xs_va, Ys_tr, Ys_va = receipts_dataset
        d = Xs_tr.shape[0]
        c = Ys_tr.shape[0]
        if torch.is_tensor(Xs_tr):
            Xs_tr = Xs_tr.numpy()
        if torch.is_tensor(Ys_tr):
            Ys_tr = Ys_tr.numpy()
        if torch.is_tensor(Xs_va):
            Xs_va = Xs_va.numpy()
        if torch.is_tensor(Ys_va):
            Ys_va = Ys_va.numpy()
        gamma, alpha, beta, W_0 = float(params[0].item()), float(params[1].item()), float(params[2].item()), np.zeros(shape=(c,d))
        W1, W2 = nn_sgd_mss_with_momentum_threaded(Xs=Xs_tr, Ys=Ys_tr, gamma=gamma, W10=W10, W20=W20, alpha=alpha, beta=beta,\
                                           B=B, num_epochs=num_epochs, num_threads=num_threads)
        Ys_pr = W2 @ sigmoid(W1 @ Xs_va)
        error = (W2 @ sigmoid(W1 @ Xs_va) - Ys_va) @ (W2 @ sigmoid(W1 @ Xs_va) - Ys_va).transpose() # use 1 - R^2 as error to select hyperparameters
        return float(error)
    return objective

In [42]:
# perform Bayesian Optimization to find optimal hyperparameters
obj = receipts_dataset_sgd_mss_with_momentum(receipts_dataset, B=8, num_epochs=40, num_threads=8)
(y_best, x_best, Ys_vec, Xs_vec) = bayes_opt(objective=obj, d=3, gamma=10, sigma2_noise=0.001, acquisition=lcb_acquisition(kappa=2.0),\
                                     random_x=torch.randn, gd_nruns=20, gd_alpha=0.01, gd_niters=20, n_warmup=3, num_iters=20)
print(y_best) # best R^2 score
print(x_best) # best hyperparameter set
print(Ys_vec) # R^2 score history
print(Xs_vec) # hyperparameter set history

running thread  <Thread(Thread-28, initial)>
running thread  <Thread(Thread-29, initial)>
running thread  <Thread(Thread-30, initial)>
running thread  <Thread(Thread-31, initial)>
running thread  <Thread(Thread-32, initial)>
running thread  <Thread(Thread-33, initial)>
running thread  <Thread(Thread-34, initial)>
running thread  <Thread(Thread-35, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.35it/s]
  return 1 / (1 + np.exp(-x))


current loss: [[49.4804125]]
running thread  <Thread(Thread-36, initial)>
running thread  <Thread(Thread-37, initial)>
running thread  <Thread(Thread-38, initial)>
running thread  <Thread(Thread-39, initial)>
running thread  <Thread(Thread-40, initial)>
running thread  <Thread(Thread-41, initial)>
running thread  <Thread(Thread-42, initial)>
running thread  <Thread(Thread-43, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.79it/s]


current loss: [[49.43518533]]
running thread  <Thread(Thread-44, initial)>
running thread  <Thread(Thread-45, initial)>
running thread  <Thread(Thread-46, initial)>
running thread  <Thread(Thread-47, initial)>
running thread  <Thread(Thread-48, initial)>
running thread  <Thread(Thread-49, initial)>
running thread  <Thread(Thread-50, initial)>
running thread  <Thread(Thread-51, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.91it/s]
  k @ torch.linalg.inv(sigma + sigma2_noise * torch.eye(n)) @ k.T


current loss: [[49.47873172]]
running thread  <Thread(Thread-52, initial)>
running thread  <Thread(Thread-53, initial)>
running thread  <Thread(Thread-54, initial)>
running thread  <Thread(Thread-55, initial)>
running thread  <Thread(Thread-56, initial)>
running thread  <Thread(Thread-57, initial)>
running thread  <Thread(Thread-58, initial)>
running thread  <Thread(Thread-59, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


  super().__init__(1 / (1 + np.exp(-x.data)), [x])
  self.x.grad = self.x.grad + 1 / (1 + np.exp(-self.x.data)) * (1 - 1 / (1 + np.exp(-self.x.data))) * self.grad
100%|██████████| 40/40 [00:05<00:00,  7.94it/s]


current loss: [[1.22121177e+250]]
running thread  <Thread(Thread-60, initial)>
running thread  <Thread(Thread-61, initial)>
running thread  <Thread(Thread-62, initial)>
running thread  <Thread(Thread-63, initial)>
running thread  <Thread(Thread-64, initial)>
running thread  <Thread(Thread-65, initial)>
running thread  <Thread(Thread-66, initial)>
running thread  <Thread(Thread-67, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 11.08it/s]


current loss: [[2.99495633e+92]]
running thread  <Thread(Thread-68, initial)>
running thread  <Thread(Thread-69, initial)>
running thread  <Thread(Thread-70, initial)>
running thread  <Thread(Thread-71, initial)>
running thread  <Thread(Thread-72, initial)>
running thread  <Thread(Thread-73, initial)>
running thread  <Thread(Thread-74, initial)>
running thread  <Thread(Thread-75, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.26it/s]


current loss: [[1.41136553e+98]]
running thread  <Thread(Thread-76, initial)>
running thread  <Thread(Thread-77, initial)>
running thread  <Thread(Thread-78, initial)>
running thread  <Thread(Thread-79, initial)>
running thread  <Thread(Thread-80, initial)>
running thread  <Thread(Thread-81, initial)>
running thread  <Thread(Thread-82, initial)>
running thread  <Thread(Thread-83, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.43it/s]


current loss: [[5.09202893e+89]]
running thread  <Thread(Thread-84, initial)>
running thread  <Thread(Thread-85, initial)>
running thread  <Thread(Thread-86, initial)>
running thread  <Thread(Thread-87, initial)>
running thread  <Thread(Thread-88, initial)>
running thread  <Thread(Thread-89, initial)>
running thread  <Thread(Thread-90, initial)>
running thread  <Thread(Thread-91, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.39it/s]


current loss: [[5.25733705e+156]]
running thread  <Thread(Thread-92, initial)>
running thread  <Thread(Thread-93, initial)>
running thread  <Thread(Thread-94, initial)>
running thread  <Thread(Thread-95, initial)>
running thread  <Thread(Thread-96, initial)>
running thread  <Thread(Thread-97, initial)>
running thread  <Thread(Thread-98, initial)>
running thread  <Thread(Thread-99, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.58it/s]


current loss: [[44.88976852]]
running thread  <Thread(Thread-100, initial)>
running thread  <Thread(Thread-101, initial)>
running thread  <Thread(Thread-102, initial)>
running thread  <Thread(Thread-103, initial)>
running thread  <Thread(Thread-104, initial)>
running thread  <Thread(Thread-105, initial)>
running thread  <Thread(Thread-106, initial)>
running thread  <Thread(Thread-107, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.86it/s]


current loss: [[47.20634691]]
running thread  <Thread(Thread-108, initial)>
running thread  <Thread(Thread-109, initial)>
running thread  <Thread(Thread-110, initial)>
running thread  <Thread(Thread-111, initial)>
running thread  <Thread(Thread-112, initial)>
running thread  <Thread(Thread-113, initial)>
running thread  <Thread(Thread-114, initial)>
running thread  <Thread(Thread-115, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


  super().__init__(x.data @ y.data, [x,y])
  self.x.grad = self.x.grad+self.grad@self.y.data.T
  self.x.grad = self.x.grad + 1 / (1 + np.exp(-self.x.data)) * (1 - 1 / (1 + np.exp(-self.x.data))) * self.grad
  self.y.grad = self.y.grad+self.x.data.T@self.grad
100%|██████████| 40/40 [00:02<00:00, 13.41it/s]


current loss: [[nan]]
running thread  <Thread(Thread-116, initial)>
running thread  <Thread(Thread-117, initial)>
running thread  <Thread(Thread-118, initial)>
running thread  <Thread(Thread-119, initial)>
running thread  <Thread(Thread-120, initial)>
running thread  <Thread(Thread-121, initial)>
running thread  <Thread(Thread-122, initial)>
running thread  <Thread(Thread-123, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.57it/s]


current loss: [[2.91782555e+218]]
running thread  <Thread(Thread-124, initial)>
running thread  <Thread(Thread-125, initial)>
running thread  <Thread(Thread-126, initial)>
running thread  <Thread(Thread-127, initial)>
running thread  <Thread(Thread-128, initial)>
running thread  <Thread(Thread-129, initial)>
running thread  <Thread(Thread-130, initial)>
running thread  <Thread(Thread-131, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.64it/s]


current loss: [[1.04079491e+149]]
running thread  <Thread(Thread-132, initial)>
running thread  <Thread(Thread-133, initial)>
running thread  <Thread(Thread-134, initial)>
running thread  <Thread(Thread-135, initial)>
running thread  <Thread(Thread-136, initial)>
running thread  <Thread(Thread-137, initial)>
running thread  <Thread(Thread-138, initial)>
running thread  <Thread(Thread-139, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.26it/s]


current loss: [[2.19124077e+221]]
running thread  <Thread(Thread-140, initial)>
running thread  <Thread(Thread-141, initial)>
running thread  <Thread(Thread-142, initial)>
running thread  <Thread(Thread-143, initial)>
running thread  <Thread(Thread-144, initial)>
running thread  <Thread(Thread-145, initial)>
running thread  <Thread(Thread-146, initial)>
running thread  <Thread(Thread-147, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.63it/s]


current loss: [[1.04496984e+279]]
running thread  <Thread(Thread-148, initial)>
running thread  <Thread(Thread-149, initial)>
running thread  <Thread(Thread-150, initial)>
running thread  <Thread(Thread-151, initial)>
running thread  <Thread(Thread-152, initial)>
running thread  <Thread(Thread-153, initial)>
running thread  <Thread(Thread-154, initial)>
running thread  <Thread(Thread-155, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.34it/s]


current loss: [[1.13521238e+191]]
running thread  <Thread(Thread-156, initial)>
running thread  <Thread(Thread-157, initial)>
running thread  <Thread(Thread-158, initial)>
running thread  <Thread(Thread-159, initial)>
running thread  <Thread(Thread-160, initial)>
running thread  <Thread(Thread-161, initial)>
running thread  <Thread(Thread-162, initial)>
running thread  <Thread(Thread-163, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.46it/s]


current loss: [[53.37752427]]
running thread  <Thread(Thread-164, initial)>
running thread  <Thread(Thread-165, initial)>
running thread  <Thread(Thread-166, initial)>
running thread  <Thread(Thread-167, initial)>
running thread  <Thread(Thread-168, initial)>
running thread  <Thread(Thread-169, initial)>
running thread  <Thread(Thread-170, initial)>
running thread  <Thread(Thread-171, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.49it/s]


current loss: [[2.592149e+125]]
running thread  <Thread(Thread-172, initial)>
running thread  <Thread(Thread-173, initial)>
running thread  <Thread(Thread-174, initial)>
running thread  <Thread(Thread-175, initial)>
running thread  <Thread(Thread-176, initial)>
running thread  <Thread(Thread-177, initial)>
running thread  <Thread(Thread-178, initial)>
running thread  <Thread(Thread-179, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.54it/s]


current loss: [[4.81699478e+282]]
running thread  <Thread(Thread-180, initial)>
running thread  <Thread(Thread-181, initial)>
running thread  <Thread(Thread-182, initial)>
running thread  <Thread(Thread-183, initial)>
running thread  <Thread(Thread-184, initial)>
running thread  <Thread(Thread-185, initial)>
running thread  <Thread(Thread-186, initial)>
running thread  <Thread(Thread-187, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.90it/s]


current loss: [[3.14865363e+206]]
running thread  <Thread(Thread-188, initial)>
running thread  <Thread(Thread-189, initial)>
running thread  <Thread(Thread-190, initial)>
running thread  <Thread(Thread-191, initial)>
running thread  <Thread(Thread-192, initial)>
running thread  <Thread(Thread-193, initial)>
running thread  <Thread(Thread-194, initial)>
running thread  <Thread(Thread-195, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.57it/s]


current loss: [[nan]]
running thread  <Thread(Thread-196, initial)>
running thread  <Thread(Thread-197, initial)>
running thread  <Thread(Thread-198, initial)>
running thread  <Thread(Thread-199, initial)>
running thread  <Thread(Thread-200, initial)>
running thread  <Thread(Thread-201, initial)>
running thread  <Thread(Thread-202, initial)>
running thread  <Thread(Thread-203, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.46it/s]


current loss: [[1.39639364e+270]]
running thread  <Thread(Thread-204, initial)>
running thread  <Thread(Thread-205, initial)>
running thread  <Thread(Thread-206, initial)>
running thread  <Thread(Thread-207, initial)>
running thread  <Thread(Thread-208, initial)>
running thread  <Thread(Thread-209, initial)>
running thread  <Thread(Thread-210, initial)>
running thread  <Thread(Thread-211, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.54it/s]


current loss: [[45.83264433]]
running thread  <Thread(Thread-212, initial)>
running thread  <Thread(Thread-213, initial)>
running thread  <Thread(Thread-214, initial)>
running thread  <Thread(Thread-215, initial)>
running thread  <Thread(Thread-216, initial)>
running thread  <Thread(Thread-217, initial)>
running thread  <Thread(Thread-218, initial)>
running thread  <Thread(Thread-219, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.82it/s]


current loss: [[49.44844049]]
running thread  <Thread(Thread-220, initial)>
running thread  <Thread(Thread-221, initial)>
running thread  <Thread(Thread-222, initial)>
running thread  <Thread(Thread-223, initial)>
running thread  <Thread(Thread-224, initial)>
running thread  <Thread(Thread-225, initial)>
running thread  <Thread(Thread-226, initial)>
running thread  <Thread(Thread-227, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.61it/s]


current loss: [[48.57321588]]
running thread  <Thread(Thread-228, initial)>
running thread  <Thread(Thread-229, initial)>
running thread  <Thread(Thread-230, initial)>
running thread  <Thread(Thread-231, initial)>
running thread  <Thread(Thread-232, initial)>
running thread  <Thread(Thread-233, initial)>
running thread  <Thread(Thread-234, initial)>
running thread  <Thread(Thread-235, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.09it/s]


current loss: [[nan]]
running thread  <Thread(Thread-236, initial)>
running thread  <Thread(Thread-237, initial)>
running thread  <Thread(Thread-238, initial)>
running thread  <Thread(Thread-239, initial)>
running thread  <Thread(Thread-240, initial)>
running thread  <Thread(Thread-241, initial)>
running thread  <Thread(Thread-242, initial)>
running thread  <Thread(Thread-243, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.27it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-244, initial)>
running thread  <Thread(Thread-245, initial)>
running thread  <Thread(Thread-246, initial)>
running thread  <Thread(Thread-247, initial)>
running thread  <Thread(Thread-248, initial)>
running thread  <Thread(Thread-249, initial)>
running thread  <Thread(Thread-250, initial)>
running thread  <Thread(Thread-251, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.00it/s]


current loss: [[54.53323233]]
running thread  <Thread(Thread-252, initial)>
running thread  <Thread(Thread-253, initial)>
running thread  <Thread(Thread-254, initial)>
running thread  <Thread(Thread-255, initial)>
running thread  <Thread(Thread-256, initial)>
running thread  <Thread(Thread-257, initial)>
running thread  <Thread(Thread-258, initial)>
running thread  <Thread(Thread-259, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.77it/s]


current loss: [[2.33684033e+194]]
running thread  <Thread(Thread-260, initial)>
running thread  <Thread(Thread-261, initial)>
running thread  <Thread(Thread-262, initial)>
running thread  <Thread(Thread-263, initial)>
running thread  <Thread(Thread-264, initial)>
running thread  <Thread(Thread-265, initial)>
running thread  <Thread(Thread-266, initial)>
running thread  <Thread(Thread-267, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.21it/s]


current loss: [[44.16683283]]
running thread  <Thread(Thread-268, initial)>
running thread  <Thread(Thread-269, initial)>
running thread  <Thread(Thread-270, initial)>
running thread  <Thread(Thread-271, initial)>
running thread  <Thread(Thread-272, initial)>
running thread  <Thread(Thread-273, initial)>
running thread  <Thread(Thread-274, initial)>
running thread  <Thread(Thread-275, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.23it/s]


current loss: [[nan]]
running thread  <Thread(Thread-276, initial)>
running thread  <Thread(Thread-277, initial)>
running thread  <Thread(Thread-278, initial)>
running thread  <Thread(Thread-279, initial)>
running thread  <Thread(Thread-280, initial)>
running thread  <Thread(Thread-281, initial)>
running thread  <Thread(Thread-282, initial)>
running thread  <Thread(Thread-283, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.62it/s]


current loss: [[47.93250533]]
running thread  <Thread(Thread-284, initial)>
running thread  <Thread(Thread-285, initial)>
running thread  <Thread(Thread-286, initial)>
running thread  <Thread(Thread-287, initial)>
running thread  <Thread(Thread-288, initial)>
running thread  <Thread(Thread-289, initial)>
running thread  <Thread(Thread-290, initial)>
running thread  <Thread(Thread-291, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.57it/s]


current loss: [[1.73542273e+301]]
running thread  <Thread(Thread-292, initial)>
running thread  <Thread(Thread-293, initial)>
running thread  <Thread(Thread-294, initial)>
running thread  <Thread(Thread-295, initial)>
running thread  <Thread(Thread-296, initial)>
running thread  <Thread(Thread-297, initial)>
running thread  <Thread(Thread-298, initial)>
running thread  <Thread(Thread-299, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.65it/s]


current loss: [[41.0685648]]
running thread  <Thread(Thread-300, initial)>
running thread  <Thread(Thread-301, initial)>
running thread  <Thread(Thread-302, initial)>
running thread  <Thread(Thread-303, initial)>
running thread  <Thread(Thread-304, initial)>
running thread  <Thread(Thread-305, initial)>
running thread  <Thread(Thread-306, initial)>
running thread  <Thread(Thread-307, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.60it/s]
  print("current loss: " + str((W2 @ sigmoid(W1 @ Xs) - Ys) @ (W2 @ sigmoid(W1 @ Xs) - Ys).transpose())) # report current loss
  error = (W2 @ sigmoid(W1 @ Xs_va) - Ys_va) @ (W2 @ sigmoid(W1 @ Xs_va) - Ys_va).transpose() # use 1 - R^2 as error to select hyperparameters


current loss: [[inf]]
running thread  <Thread(Thread-308, initial)>
running thread  <Thread(Thread-309, initial)>
running thread  <Thread(Thread-310, initial)>
running thread  <Thread(Thread-311, initial)>
running thread  <Thread(Thread-312, initial)>
running thread  <Thread(Thread-313, initial)>
running thread  <Thread(Thread-314, initial)>
running thread  <Thread(Thread-315, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.29it/s]


current loss: [[nan]]
running thread  <Thread(Thread-316, initial)>
running thread  <Thread(Thread-317, initial)>
running thread  <Thread(Thread-318, initial)>
running thread  <Thread(Thread-319, initial)>
running thread  <Thread(Thread-320, initial)>
running thread  <Thread(Thread-321, initial)>
running thread  <Thread(Thread-322, initial)>
running thread  <Thread(Thread-323, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.70it/s]


current loss: [[7.47770453e+88]]
running thread  <Thread(Thread-324, initial)>
running thread  <Thread(Thread-325, initial)>
running thread  <Thread(Thread-326, initial)>
running thread  <Thread(Thread-327, initial)>
running thread  <Thread(Thread-328, initial)>
running thread  <Thread(Thread-329, initial)>
running thread  <Thread(Thread-330, initial)>
running thread  <Thread(Thread-331, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.98it/s]


current loss: [[8.38824257e+98]]
running thread  <Thread(Thread-332, initial)>
running thread  <Thread(Thread-333, initial)>
running thread  <Thread(Thread-334, initial)>
running thread  <Thread(Thread-335, initial)>
running thread  <Thread(Thread-336, initial)>
running thread  <Thread(Thread-337, initial)>
running thread  <Thread(Thread-338, initial)>
running thread  <Thread(Thread-339, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.68it/s]


current loss: [[inf]]
running thread  <Thread(Thread-340, initial)>
running thread  <Thread(Thread-341, initial)>
running thread  <Thread(Thread-342, initial)>
running thread  <Thread(Thread-343, initial)>
running thread  <Thread(Thread-344, initial)>
running thread  <Thread(Thread-345, initial)>
running thread  <Thread(Thread-346, initial)>
running thread  <Thread(Thread-347, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.75it/s]


current loss: [[4.80418651e+206]]
running thread  <Thread(Thread-348, initial)>
running thread  <Thread(Thread-349, initial)>
running thread  <Thread(Thread-350, initial)>
running thread  <Thread(Thread-351, initial)>
running thread  <Thread(Thread-352, initial)>
running thread  <Thread(Thread-353, initial)>
running thread  <Thread(Thread-354, initial)>
running thread  <Thread(Thread-355, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.59it/s]


current loss: [[48.25762874]]
running thread  <Thread(Thread-356, initial)>
running thread  <Thread(Thread-357, initial)>
running thread  <Thread(Thread-358, initial)>
running thread  <Thread(Thread-359, initial)>
running thread  <Thread(Thread-360, initial)>
running thread  <Thread(Thread-361, initial)>
running thread  <Thread(Thread-362, initial)>
running thread  <Thread(Thread-363, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.43it/s]


current loss: [[4.1522796e+104]]
running thread  <Thread(Thread-364, initial)>
running thread  <Thread(Thread-365, initial)>
running thread  <Thread(Thread-366, initial)>
running thread  <Thread(Thread-367, initial)>
running thread  <Thread(Thread-368, initial)>
running thread  <Thread(Thread-369, initial)>
running thread  <Thread(Thread-370, initial)>
running thread  <Thread(Thread-371, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.75it/s]


current loss: [[2.01651983e+296]]
running thread  <Thread(Thread-372, initial)>
running thread  <Thread(Thread-373, initial)>
running thread  <Thread(Thread-374, initial)>
running thread  <Thread(Thread-375, initial)>
running thread  <Thread(Thread-376, initial)>
running thread  <Thread(Thread-377, initial)>
running thread  <Thread(Thread-378, initial)>
running thread  <Thread(Thread-379, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.39it/s]


current loss: [[nan]]
running thread  <Thread(Thread-380, initial)>
running thread  <Thread(Thread-381, initial)>
running thread  <Thread(Thread-382, initial)>
running thread  <Thread(Thread-383, initial)>
running thread  <Thread(Thread-384, initial)>
running thread  <Thread(Thread-385, initial)>
running thread  <Thread(Thread-386, initial)>
running thread  <Thread(Thread-387, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.64it/s]


current loss: [[49.18744277]]
running thread  <Thread(Thread-388, initial)>
running thread  <Thread(Thread-389, initial)>
running thread  <Thread(Thread-390, initial)>
running thread  <Thread(Thread-391, initial)>
running thread  <Thread(Thread-392, initial)>
running thread  <Thread(Thread-393, initial)>
running thread  <Thread(Thread-394, initial)>
running thread  <Thread(Thread-395, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.78it/s]


current loss: [[4.05707751e+175]]
running thread  <Thread(Thread-396, initial)>
running thread  <Thread(Thread-397, initial)>
running thread  <Thread(Thread-398, initial)>
running thread  <Thread(Thread-399, initial)>
running thread  <Thread(Thread-400, initial)>
running thread  <Thread(Thread-401, initial)>
running thread  <Thread(Thread-402, initial)>
running thread  <Thread(Thread-403, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.36it/s]


current loss: [[44.96368565]]
running thread  <Thread(Thread-404, initial)>
running thread  <Thread(Thread-405, initial)>
running thread  <Thread(Thread-406, initial)>
running thread  <Thread(Thread-407, initial)>
running thread  <Thread(Thread-408, initial)>
running thread  <Thread(Thread-409, initial)>
running thread  <Thread(Thread-410, initial)>
running thread  <Thread(Thread-411, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.00it/s]


current loss: [[43.87955532]]
running thread  <Thread(Thread-412, initial)>
running thread  <Thread(Thread-413, initial)>
running thread  <Thread(Thread-414, initial)>
running thread  <Thread(Thread-415, initial)>
running thread  <Thread(Thread-416, initial)>
running thread  <Thread(Thread-417, initial)>
running thread  <Thread(Thread-418, initial)>
running thread  <Thread(Thread-419, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.20it/s]


current loss: [[1.30064272e+273]]
running thread  <Thread(Thread-420, initial)>
running thread  <Thread(Thread-421, initial)>
running thread  <Thread(Thread-422, initial)>
running thread  <Thread(Thread-423, initial)>
running thread  <Thread(Thread-424, initial)>
running thread  <Thread(Thread-425, initial)>
running thread  <Thread(Thread-426, initial)>
running thread  <Thread(Thread-427, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.92it/s]


current loss: [[46.9106903]]
running thread  <Thread(Thread-428, initial)>
running thread  <Thread(Thread-429, initial)>
running thread  <Thread(Thread-430, initial)>
running thread  <Thread(Thread-431, initial)>
running thread  <Thread(Thread-432, initial)>
running thread  <Thread(Thread-433, initial)>
running thread  <Thread(Thread-434, initial)>
running thread  <Thread(Thread-435, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.67it/s]


current loss: [[4.63507674e+292]]
running thread  <Thread(Thread-436, initial)>
running thread  <Thread(Thread-437, initial)>
running thread  <Thread(Thread-438, initial)>
running thread  <Thread(Thread-439, initial)>
running thread  <Thread(Thread-440, initial)>
running thread  <Thread(Thread-441, initial)>
running thread  <Thread(Thread-442, initial)>
running thread  <Thread(Thread-443, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.58it/s]


current loss: [[1.55264637e+08]]
running thread  <Thread(Thread-444, initial)>
running thread  <Thread(Thread-445, initial)>
running thread  <Thread(Thread-446, initial)>
running thread  <Thread(Thread-447, initial)>
running thread  <Thread(Thread-448, initial)>
running thread  <Thread(Thread-449, initial)>
running thread  <Thread(Thread-450, initial)>
running thread  <Thread(Thread-451, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.95it/s]


current loss: [[1.63459907e+124]]
running thread  <Thread(Thread-452, initial)>
running thread  <Thread(Thread-453, initial)>
running thread  <Thread(Thread-454, initial)>
running thread  <Thread(Thread-455, initial)>
running thread  <Thread(Thread-456, initial)>
running thread  <Thread(Thread-457, initial)>
running thread  <Thread(Thread-458, initial)>
running thread  <Thread(Thread-459, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.54it/s]


current loss: [[46.52477853]]
running thread  <Thread(Thread-460, initial)>
running thread  <Thread(Thread-461, initial)>
running thread  <Thread(Thread-462, initial)>
running thread  <Thread(Thread-463, initial)>
running thread  <Thread(Thread-464, initial)>
running thread  <Thread(Thread-465, initial)>
running thread  <Thread(Thread-466, initial)>
running thread  <Thread(Thread-467, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.63it/s]


current loss: [[4.98433962e+52]]
running thread  <Thread(Thread-468, initial)>
running thread  <Thread(Thread-469, initial)>
running thread  <Thread(Thread-470, initial)>
running thread  <Thread(Thread-471, initial)>
running thread  <Thread(Thread-472, initial)>
running thread  <Thread(Thread-473, initial)>
running thread  <Thread(Thread-474, initial)>
running thread  <Thread(Thread-475, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.69it/s]


current loss: [[2.3321929e+174]]
running thread  <Thread(Thread-476, initial)>
running thread  <Thread(Thread-477, initial)>
running thread  <Thread(Thread-478, initial)>
running thread  <Thread(Thread-479, initial)>
running thread  <Thread(Thread-480, initial)>
running thread  <Thread(Thread-481, initial)>
running thread  <Thread(Thread-482, initial)>
running thread  <Thread(Thread-483, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.55it/s]


current loss: [[4.53596442e+191]]
running thread  <Thread(Thread-484, initial)>
running thread  <Thread(Thread-485, initial)>
running thread  <Thread(Thread-486, initial)>
running thread  <Thread(Thread-487, initial)>
running thread  <Thread(Thread-488, initial)>
running thread  <Thread(Thread-489, initial)>
running thread  <Thread(Thread-490, initial)>
running thread  <Thread(Thread-491, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.69it/s]


current loss: [[1.08752504e+157]]
running thread  <Thread(Thread-492, initial)>
running thread  <Thread(Thread-493, initial)>
running thread  <Thread(Thread-494, initial)>
running thread  <Thread(Thread-495, initial)>
running thread  <Thread(Thread-496, initial)>
running thread  <Thread(Thread-497, initial)>
running thread  <Thread(Thread-498, initial)>
running thread  <Thread(Thread-499, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.49it/s]


current loss: [[48.05240327]]
running thread  <Thread(Thread-500, initial)>
running thread  <Thread(Thread-501, initial)>
running thread  <Thread(Thread-502, initial)>
running thread  <Thread(Thread-503, initial)>
running thread  <Thread(Thread-504, initial)>
running thread  <Thread(Thread-505, initial)>
running thread  <Thread(Thread-506, initial)>
running thread  <Thread(Thread-507, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.78it/s]


current loss: [[1.83134786e+112]]
running thread  <Thread(Thread-508, initial)>
running thread  <Thread(Thread-509, initial)>
running thread  <Thread(Thread-510, initial)>
running thread  <Thread(Thread-511, initial)>
running thread  <Thread(Thread-512, initial)>
running thread  <Thread(Thread-513, initial)>
running thread  <Thread(Thread-514, initial)>
running thread  <Thread(Thread-515, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.01it/s]


current loss: [[2.71473129e+104]]
running thread  <Thread(Thread-516, initial)>
running thread  <Thread(Thread-517, initial)>
running thread  <Thread(Thread-518, initial)>
running thread  <Thread(Thread-519, initial)>
running thread  <Thread(Thread-520, initial)>
running thread  <Thread(Thread-521, initial)>
running thread  <Thread(Thread-522, initial)>
running thread  <Thread(Thread-523, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.72it/s]


current loss: [[2.97794012e+61]]
running thread  <Thread(Thread-524, initial)>
running thread  <Thread(Thread-525, initial)>
running thread  <Thread(Thread-526, initial)>
running thread  <Thread(Thread-527, initial)>
running thread  <Thread(Thread-528, initial)>
running thread  <Thread(Thread-529, initial)>
running thread  <Thread(Thread-530, initial)>
running thread  <Thread(Thread-531, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.15it/s]


current loss: [[nan]]
running thread  <Thread(Thread-532, initial)>
running thread  <Thread(Thread-533, initial)>
running thread  <Thread(Thread-534, initial)>
running thread  <Thread(Thread-535, initial)>
running thread  <Thread(Thread-536, initial)>
running thread  <Thread(Thread-537, initial)>
running thread  <Thread(Thread-538, initial)>
running thread  <Thread(Thread-539, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.51it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-540, initial)>
running thread  <Thread(Thread-541, initial)>
running thread  <Thread(Thread-542, initial)>
running thread  <Thread(Thread-543, initial)>
running thread  <Thread(Thread-544, initial)>
running thread  <Thread(Thread-545, initial)>
running thread  <Thread(Thread-546, initial)>
running thread  <Thread(Thread-547, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.61it/s]


current loss: [[47.91907778]]
running thread  <Thread(Thread-548, initial)>
running thread  <Thread(Thread-549, initial)>
running thread  <Thread(Thread-550, initial)>
running thread  <Thread(Thread-551, initial)>
running thread  <Thread(Thread-552, initial)>
running thread  <Thread(Thread-553, initial)>
running thread  <Thread(Thread-554, initial)>
running thread  <Thread(Thread-555, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.88it/s]


current loss: [[49.48981913]]
running thread  <Thread(Thread-556, initial)>
running thread  <Thread(Thread-557, initial)>
running thread  <Thread(Thread-558, initial)>
running thread  <Thread(Thread-559, initial)>
running thread  <Thread(Thread-560, initial)>
running thread  <Thread(Thread-561, initial)>
running thread  <Thread(Thread-562, initial)>
running thread  <Thread(Thread-563, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.93it/s]


current loss: [[1.18626649e+106]]
running thread  <Thread(Thread-564, initial)>
running thread  <Thread(Thread-565, initial)>
running thread  <Thread(Thread-566, initial)>
running thread  <Thread(Thread-567, initial)>
running thread  <Thread(Thread-568, initial)>
running thread  <Thread(Thread-569, initial)>
running thread  <Thread(Thread-570, initial)>
running thread  <Thread(Thread-571, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.29it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-572, initial)>
running thread  <Thread(Thread-573, initial)>
running thread  <Thread(Thread-574, initial)>
running thread  <Thread(Thread-575, initial)>
running thread  <Thread(Thread-576, initial)>
running thread  <Thread(Thread-577, initial)>
running thread  <Thread(Thread-578, initial)>
running thread  <Thread(Thread-579, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.75it/s]


current loss: [[45.20959919]]
running thread  <Thread(Thread-580, initial)>
running thread  <Thread(Thread-581, initial)>
running thread  <Thread(Thread-582, initial)>
running thread  <Thread(Thread-583, initial)>
running thread  <Thread(Thread-584, initial)>
running thread  <Thread(Thread-585, initial)>
running thread  <Thread(Thread-586, initial)>
running thread  <Thread(Thread-587, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.57it/s]


current loss: [[49.73573789]]
running thread  <Thread(Thread-588, initial)>
running thread  <Thread(Thread-589, initial)>
running thread  <Thread(Thread-590, initial)>
running thread  <Thread(Thread-591, initial)>
running thread  <Thread(Thread-592, initial)>
running thread  <Thread(Thread-593, initial)>
running thread  <Thread(Thread-594, initial)>
running thread  <Thread(Thread-595, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.21it/s]


current loss: [[45.09635622]]
running thread  <Thread(Thread-596, initial)>
running thread  <Thread(Thread-597, initial)>
running thread  <Thread(Thread-598, initial)>
running thread  <Thread(Thread-599, initial)>
running thread  <Thread(Thread-600, initial)>
running thread  <Thread(Thread-601, initial)>
running thread  <Thread(Thread-602, initial)>
running thread  <Thread(Thread-603, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.60it/s]


current loss: [[47.45584609]]
running thread  <Thread(Thread-604, initial)>
running thread  <Thread(Thread-605, initial)>
running thread  <Thread(Thread-606, initial)>
running thread  <Thread(Thread-607, initial)>
running thread  <Thread(Thread-608, initial)>
running thread  <Thread(Thread-609, initial)>
running thread  <Thread(Thread-610, initial)>
running thread  <Thread(Thread-611, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.77it/s]


current loss: [[4.24541206e+211]]
running thread  <Thread(Thread-612, initial)>
running thread  <Thread(Thread-613, initial)>
running thread  <Thread(Thread-614, initial)>
running thread  <Thread(Thread-615, initial)>
running thread  <Thread(Thread-616, initial)>
running thread  <Thread(Thread-617, initial)>
running thread  <Thread(Thread-618, initial)>
running thread  <Thread(Thread-619, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.12it/s]


current loss: [[45.30190356]]
running thread  <Thread(Thread-620, initial)>
running thread  <Thread(Thread-621, initial)>
running thread  <Thread(Thread-622, initial)>
running thread  <Thread(Thread-623, initial)>
running thread  <Thread(Thread-624, initial)>
running thread  <Thread(Thread-625, initial)>
running thread  <Thread(Thread-626, initial)>
running thread  <Thread(Thread-627, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.91it/s]


current loss: [[7.19649247e+163]]
running thread  <Thread(Thread-628, initial)>
running thread  <Thread(Thread-629, initial)>
running thread  <Thread(Thread-630, initial)>
running thread  <Thread(Thread-631, initial)>
running thread  <Thread(Thread-632, initial)>
running thread  <Thread(Thread-633, initial)>
running thread  <Thread(Thread-634, initial)>
running thread  <Thread(Thread-635, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.03it/s]


current loss: [[2.34958513e+148]]
running thread  <Thread(Thread-636, initial)>
running thread  <Thread(Thread-637, initial)>
running thread  <Thread(Thread-638, initial)>
running thread  <Thread(Thread-639, initial)>
running thread  <Thread(Thread-640, initial)>
running thread  <Thread(Thread-641, initial)>
running thread  <Thread(Thread-642, initial)>
running thread  <Thread(Thread-643, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.21it/s]


current loss: [[nan]]
running thread  <Thread(Thread-644, initial)>
running thread  <Thread(Thread-645, initial)>
running thread  <Thread(Thread-646, initial)>
running thread  <Thread(Thread-647, initial)>
running thread  <Thread(Thread-648, initial)>
running thread  <Thread(Thread-649, initial)>
running thread  <Thread(Thread-650, initial)>
running thread  <Thread(Thread-651, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.84it/s]


current loss: [[5.50059187e+150]]
running thread  <Thread(Thread-652, initial)>
running thread  <Thread(Thread-653, initial)>
running thread  <Thread(Thread-654, initial)>
running thread  <Thread(Thread-655, initial)>
running thread  <Thread(Thread-656, initial)>
running thread  <Thread(Thread-657, initial)>
running thread  <Thread(Thread-658, initial)>
running thread  <Thread(Thread-659, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.61it/s]


current loss: [[45.64983455]]
running thread  <Thread(Thread-660, initial)>
running thread  <Thread(Thread-661, initial)>
running thread  <Thread(Thread-662, initial)>
running thread  <Thread(Thread-663, initial)>
running thread  <Thread(Thread-664, initial)>
running thread  <Thread(Thread-665, initial)>
running thread  <Thread(Thread-666, initial)>
running thread  <Thread(Thread-667, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.24it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-668, initial)>
running thread  <Thread(Thread-669, initial)>
running thread  <Thread(Thread-670, initial)>
running thread  <Thread(Thread-671, initial)>
running thread  <Thread(Thread-672, initial)>
running thread  <Thread(Thread-673, initial)>
running thread  <Thread(Thread-674, initial)>
running thread  <Thread(Thread-675, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.49it/s]


current loss: [[49.46689304]]
running thread  <Thread(Thread-676, initial)>
running thread  <Thread(Thread-677, initial)>
running thread  <Thread(Thread-678, initial)>
running thread  <Thread(Thread-679, initial)>
running thread  <Thread(Thread-680, initial)>
running thread  <Thread(Thread-681, initial)>
running thread  <Thread(Thread-682, initial)>
running thread  <Thread(Thread-683, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.96it/s]


current loss: [[1.28088587e+275]]
running thread  <Thread(Thread-684, initial)>
running thread  <Thread(Thread-685, initial)>
running thread  <Thread(Thread-686, initial)>
running thread  <Thread(Thread-687, initial)>
running thread  <Thread(Thread-688, initial)>
running thread  <Thread(Thread-689, initial)>
running thread  <Thread(Thread-690, initial)>
running thread  <Thread(Thread-691, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.94it/s]


current loss: [[46.94985437]]
running thread  <Thread(Thread-692, initial)>
running thread  <Thread(Thread-693, initial)>
running thread  <Thread(Thread-694, initial)>
running thread  <Thread(Thread-695, initial)>
running thread  <Thread(Thread-696, initial)>
running thread  <Thread(Thread-697, initial)>
running thread  <Thread(Thread-698, initial)>
running thread  <Thread(Thread-699, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.55it/s]


current loss: [[nan]]
running thread  <Thread(Thread-700, initial)>
running thread  <Thread(Thread-701, initial)>
running thread  <Thread(Thread-702, initial)>
running thread  <Thread(Thread-703, initial)>
running thread  <Thread(Thread-704, initial)>
running thread  <Thread(Thread-705, initial)>
running thread  <Thread(Thread-706, initial)>
running thread  <Thread(Thread-707, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.94it/s]


current loss: [[7.60609651e+278]]
running thread  <Thread(Thread-708, initial)>
running thread  <Thread(Thread-709, initial)>
running thread  <Thread(Thread-710, initial)>
running thread  <Thread(Thread-711, initial)>
running thread  <Thread(Thread-712, initial)>
running thread  <Thread(Thread-713, initial)>
running thread  <Thread(Thread-714, initial)>
running thread  <Thread(Thread-715, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.19it/s]


current loss: [[3.94120365e+172]]
running thread  <Thread(Thread-716, initial)>
running thread  <Thread(Thread-717, initial)>
running thread  <Thread(Thread-718, initial)>
running thread  <Thread(Thread-719, initial)>
running thread  <Thread(Thread-720, initial)>
running thread  <Thread(Thread-721, initial)>
running thread  <Thread(Thread-722, initial)>
running thread  <Thread(Thread-723, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.16it/s]


current loss: [[4.087119e+231]]
running thread  <Thread(Thread-724, initial)>
running thread  <Thread(Thread-725, initial)>
running thread  <Thread(Thread-726, initial)>
running thread  <Thread(Thread-727, initial)>
running thread  <Thread(Thread-728, initial)>
running thread  <Thread(Thread-729, initial)>
running thread  <Thread(Thread-730, initial)>
running thread  <Thread(Thread-731, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.90it/s]


current loss: [[1.33901684e+35]]
running thread  <Thread(Thread-732, initial)>
running thread  <Thread(Thread-733, initial)>
running thread  <Thread(Thread-734, initial)>
running thread  <Thread(Thread-735, initial)>
running thread  <Thread(Thread-736, initial)>
running thread  <Thread(Thread-737, initial)>
running thread  <Thread(Thread-738, initial)>
running thread  <Thread(Thread-739, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.29it/s]


current loss: [[49.1829318]]
running thread  <Thread(Thread-740, initial)>
running thread  <Thread(Thread-741, initial)>
running thread  <Thread(Thread-742, initial)>
running thread  <Thread(Thread-743, initial)>
running thread  <Thread(Thread-744, initial)>
running thread  <Thread(Thread-745, initial)>
running thread  <Thread(Thread-746, initial)>
running thread  <Thread(Thread-747, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.58it/s]


current loss: [[8.06086224e+57]]
running thread  <Thread(Thread-748, initial)>
running thread  <Thread(Thread-749, initial)>
running thread  <Thread(Thread-750, initial)>
running thread  <Thread(Thread-751, initial)>
running thread  <Thread(Thread-752, initial)>
running thread  <Thread(Thread-753, initial)>
running thread  <Thread(Thread-754, initial)>
running thread  <Thread(Thread-755, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.12it/s]


current loss: [[nan]]
running thread  <Thread(Thread-756, initial)>
running thread  <Thread(Thread-757, initial)>
running thread  <Thread(Thread-758, initial)>
running thread  <Thread(Thread-759, initial)>
running thread  <Thread(Thread-760, initial)>
running thread  <Thread(Thread-761, initial)>
running thread  <Thread(Thread-762, initial)>
running thread  <Thread(Thread-763, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.05it/s]


current loss: [[nan]]
running thread  <Thread(Thread-764, initial)>
running thread  <Thread(Thread-765, initial)>
running thread  <Thread(Thread-766, initial)>
running thread  <Thread(Thread-767, initial)>
running thread  <Thread(Thread-768, initial)>
running thread  <Thread(Thread-769, initial)>
running thread  <Thread(Thread-770, initial)>
running thread  <Thread(Thread-771, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.74it/s]


current loss: [[45.02848964]]
running thread  <Thread(Thread-772, initial)>
running thread  <Thread(Thread-773, initial)>
running thread  <Thread(Thread-774, initial)>
running thread  <Thread(Thread-775, initial)>
running thread  <Thread(Thread-776, initial)>
running thread  <Thread(Thread-777, initial)>
running thread  <Thread(Thread-778, initial)>
running thread  <Thread(Thread-779, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.39it/s]


current loss: [[nan]]
running thread  <Thread(Thread-780, initial)>
running thread  <Thread(Thread-781, initial)>
running thread  <Thread(Thread-782, initial)>
running thread  <Thread(Thread-783, initial)>
running thread  <Thread(Thread-784, initial)>
running thread  <Thread(Thread-785, initial)>
running thread  <Thread(Thread-786, initial)>
running thread  <Thread(Thread-787, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.99it/s]


current loss: [[1.26378155e+69]]
running thread  <Thread(Thread-788, initial)>
running thread  <Thread(Thread-789, initial)>
running thread  <Thread(Thread-790, initial)>
running thread  <Thread(Thread-791, initial)>
running thread  <Thread(Thread-792, initial)>
running thread  <Thread(Thread-793, initial)>
running thread  <Thread(Thread-794, initial)>
running thread  <Thread(Thread-795, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.37it/s]


current loss: [[7.78605936e+92]]
running thread  <Thread(Thread-796, initial)>
running thread  <Thread(Thread-797, initial)>
running thread  <Thread(Thread-798, initial)>
running thread  <Thread(Thread-799, initial)>
running thread  <Thread(Thread-800, initial)>
running thread  <Thread(Thread-801, initial)>
running thread  <Thread(Thread-802, initial)>
running thread  <Thread(Thread-803, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.13it/s]


current loss: [[2.51853361e+129]]
running thread  <Thread(Thread-804, initial)>
running thread  <Thread(Thread-805, initial)>
running thread  <Thread(Thread-806, initial)>
running thread  <Thread(Thread-807, initial)>
running thread  <Thread(Thread-808, initial)>
running thread  <Thread(Thread-809, initial)>
running thread  <Thread(Thread-810, initial)>
running thread  <Thread(Thread-811, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.92it/s]


current loss: [[3.79212734e+273]]
running thread  <Thread(Thread-812, initial)>
running thread  <Thread(Thread-813, initial)>
running thread  <Thread(Thread-814, initial)>
running thread  <Thread(Thread-815, initial)>
running thread  <Thread(Thread-816, initial)>
running thread  <Thread(Thread-817, initial)>
running thread  <Thread(Thread-818, initial)>
running thread  <Thread(Thread-819, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.74it/s]


current loss: [[4.21552295e+151]]
running thread  <Thread(Thread-820, initial)>
running thread  <Thread(Thread-821, initial)>
running thread  <Thread(Thread-822, initial)>
running thread  <Thread(Thread-823, initial)>
running thread  <Thread(Thread-824, initial)>
running thread  <Thread(Thread-825, initial)>
running thread  <Thread(Thread-826, initial)>
running thread  <Thread(Thread-827, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.38it/s]


current loss: [[46.18873732]]
running thread  <Thread(Thread-828, initial)>
running thread  <Thread(Thread-829, initial)>
running thread  <Thread(Thread-830, initial)>
running thread  <Thread(Thread-831, initial)>
running thread  <Thread(Thread-832, initial)>
running thread  <Thread(Thread-833, initial)>
running thread  <Thread(Thread-834, initial)>
running thread  <Thread(Thread-835, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.72it/s]


current loss: [[47.90089042]]
running thread  <Thread(Thread-836, initial)>
running thread  <Thread(Thread-837, initial)>
running thread  <Thread(Thread-838, initial)>
running thread  <Thread(Thread-839, initial)>
running thread  <Thread(Thread-840, initial)>
running thread  <Thread(Thread-841, initial)>
running thread  <Thread(Thread-842, initial)>
running thread  <Thread(Thread-843, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.50it/s]


current loss: [[49.93538929]]
running thread  <Thread(Thread-844, initial)>
running thread  <Thread(Thread-845, initial)>
running thread  <Thread(Thread-846, initial)>
running thread  <Thread(Thread-847, initial)>
running thread  <Thread(Thread-848, initial)>
running thread  <Thread(Thread-849, initial)>
running thread  <Thread(Thread-850, initial)>
running thread  <Thread(Thread-851, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.31it/s]


current loss: [[7.27390761e+121]]
running thread  <Thread(Thread-852, initial)>
running thread  <Thread(Thread-853, initial)>
running thread  <Thread(Thread-854, initial)>
running thread  <Thread(Thread-855, initial)>
running thread  <Thread(Thread-856, initial)>
running thread  <Thread(Thread-857, initial)>
running thread  <Thread(Thread-858, initial)>
running thread  <Thread(Thread-859, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.19it/s]


current loss: [[45.57555012]]
running thread  <Thread(Thread-860, initial)>
running thread  <Thread(Thread-861, initial)>
running thread  <Thread(Thread-862, initial)>
running thread  <Thread(Thread-863, initial)>
running thread  <Thread(Thread-864, initial)>
running thread  <Thread(Thread-865, initial)>
running thread  <Thread(Thread-866, initial)>
running thread  <Thread(Thread-867, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.65it/s]


current loss: [[1.53613808e+180]]
running thread  <Thread(Thread-868, initial)>
running thread  <Thread(Thread-869, initial)>
running thread  <Thread(Thread-870, initial)>
running thread  <Thread(Thread-871, initial)>
running thread  <Thread(Thread-872, initial)>
running thread  <Thread(Thread-873, initial)>
running thread  <Thread(Thread-874, initial)>
running thread  <Thread(Thread-875, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.16it/s]


current loss: [[3.53846099e+16]]
running thread  <Thread(Thread-876, initial)>
running thread  <Thread(Thread-877, initial)>
running thread  <Thread(Thread-878, initial)>
running thread  <Thread(Thread-879, initial)>
running thread  <Thread(Thread-880, initial)>
running thread  <Thread(Thread-881, initial)>
running thread  <Thread(Thread-882, initial)>
running thread  <Thread(Thread-883, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.44it/s]


current loss: [[3.32666634e+141]]
running thread  <Thread(Thread-884, initial)>
running thread  <Thread(Thread-885, initial)>
running thread  <Thread(Thread-886, initial)>
running thread  <Thread(Thread-887, initial)>
running thread  <Thread(Thread-888, initial)>
running thread  <Thread(Thread-889, initial)>
running thread  <Thread(Thread-890, initial)>
running thread  <Thread(Thread-891, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.41it/s]


current loss: [[49.90102123]]
running thread  <Thread(Thread-892, initial)>
running thread  <Thread(Thread-893, initial)>
running thread  <Thread(Thread-894, initial)>
running thread  <Thread(Thread-895, initial)>
running thread  <Thread(Thread-896, initial)>
running thread  <Thread(Thread-897, initial)>
running thread  <Thread(Thread-898, initial)>
running thread  <Thread(Thread-899, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.51it/s]


current loss: [[49.47042631]]
running thread  <Thread(Thread-900, initial)>
running thread  <Thread(Thread-901, initial)>
running thread  <Thread(Thread-902, initial)>
running thread  <Thread(Thread-903, initial)>
running thread  <Thread(Thread-904, initial)>
running thread  <Thread(Thread-905, initial)>
running thread  <Thread(Thread-906, initial)>
running thread  <Thread(Thread-907, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.95it/s]


current loss: [[nan]]
running thread  <Thread(Thread-908, initial)>
running thread  <Thread(Thread-909, initial)>
running thread  <Thread(Thread-910, initial)>
running thread  <Thread(Thread-911, initial)>
running thread  <Thread(Thread-912, initial)>
running thread  <Thread(Thread-913, initial)>
running thread  <Thread(Thread-914, initial)>
running thread  <Thread(Thread-915, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.46it/s]


current loss: [[1.50177251e+67]]
running thread  <Thread(Thread-916, initial)>
running thread  <Thread(Thread-917, initial)>
running thread  <Thread(Thread-918, initial)>
running thread  <Thread(Thread-919, initial)>
running thread  <Thread(Thread-920, initial)>
running thread  <Thread(Thread-921, initial)>
running thread  <Thread(Thread-922, initial)>
running thread  <Thread(Thread-923, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.67it/s]


current loss: [[2.45835355e+245]]
running thread  <Thread(Thread-924, initial)>
running thread  <Thread(Thread-925, initial)>
running thread  <Thread(Thread-926, initial)>
running thread  <Thread(Thread-927, initial)>
running thread  <Thread(Thread-928, initial)>
running thread  <Thread(Thread-929, initial)>
running thread  <Thread(Thread-930, initial)>
running thread  <Thread(Thread-931, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.51it/s]


current loss: [[nan]]
running thread  <Thread(Thread-932, initial)>
running thread  <Thread(Thread-933, initial)>
running thread  <Thread(Thread-934, initial)>
running thread  <Thread(Thread-935, initial)>
running thread  <Thread(Thread-936, initial)>
running thread  <Thread(Thread-937, initial)>
running thread  <Thread(Thread-938, initial)>
running thread  <Thread(Thread-939, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.59it/s]


current loss: [[nan]]
running thread  <Thread(Thread-940, initial)>
running thread  <Thread(Thread-941, initial)>
running thread  <Thread(Thread-942, initial)>
running thread  <Thread(Thread-943, initial)>
running thread  <Thread(Thread-944, initial)>
running thread  <Thread(Thread-945, initial)>
running thread  <Thread(Thread-946, initial)>
running thread  <Thread(Thread-947, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.48it/s]


current loss: [[1.44125859e+206]]
running thread  <Thread(Thread-948, initial)>
running thread  <Thread(Thread-949, initial)>
running thread  <Thread(Thread-950, initial)>
running thread  <Thread(Thread-951, initial)>
running thread  <Thread(Thread-952, initial)>
running thread  <Thread(Thread-953, initial)>
running thread  <Thread(Thread-954, initial)>
running thread  <Thread(Thread-955, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.03it/s]


current loss: [[8.37957127e+168]]
running thread  <Thread(Thread-956, initial)>
running thread  <Thread(Thread-957, initial)>
running thread  <Thread(Thread-958, initial)>
running thread  <Thread(Thread-959, initial)>
running thread  <Thread(Thread-960, initial)>
running thread  <Thread(Thread-961, initial)>
running thread  <Thread(Thread-962, initial)>
running thread  <Thread(Thread-963, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.81it/s]


current loss: [[46.14478078]]
running thread  <Thread(Thread-964, initial)>
running thread  <Thread(Thread-965, initial)>
running thread  <Thread(Thread-966, initial)>
running thread  <Thread(Thread-967, initial)>
running thread  <Thread(Thread-968, initial)>
running thread  <Thread(Thread-969, initial)>
running thread  <Thread(Thread-970, initial)>
running thread  <Thread(Thread-971, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.74it/s]


current loss: [[1.25709272e+71]]
running thread  <Thread(Thread-972, initial)>
running thread  <Thread(Thread-973, initial)>
running thread  <Thread(Thread-974, initial)>
running thread  <Thread(Thread-975, initial)>
running thread  <Thread(Thread-976, initial)>
running thread  <Thread(Thread-977, initial)>
running thread  <Thread(Thread-978, initial)>
running thread  <Thread(Thread-979, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.66it/s]


current loss: [[1.39625781e+39]]
running thread  <Thread(Thread-980, initial)>
running thread  <Thread(Thread-981, initial)>
running thread  <Thread(Thread-982, initial)>
running thread  <Thread(Thread-983, initial)>
running thread  <Thread(Thread-984, initial)>
running thread  <Thread(Thread-985, initial)>
running thread  <Thread(Thread-986, initial)>
running thread  <Thread(Thread-987, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.79it/s]


current loss: [[5.98068932e+252]]
running thread  <Thread(Thread-988, initial)>
running thread  <Thread(Thread-989, initial)>
running thread  <Thread(Thread-990, initial)>
running thread  <Thread(Thread-991, initial)>
running thread  <Thread(Thread-992, initial)>
running thread  <Thread(Thread-993, initial)>
running thread  <Thread(Thread-994, initial)>
running thread  <Thread(Thread-995, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.28it/s]


current loss: [[49.38707217]]
running thread  <Thread(Thread-996, initial)>
running thread  <Thread(Thread-997, initial)>
running thread  <Thread(Thread-998, initial)>
running thread  <Thread(Thread-999, initial)>
running thread  <Thread(Thread-1000, initial)>
running thread  <Thread(Thread-1001, initial)>
running thread  <Thread(Thread-1002, initial)>
running thread  <Thread(Thread-1003, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.61it/s]


current loss: [[5.545759e+122]]
running thread  <Thread(Thread-1004, initial)>
running thread  <Thread(Thread-1005, initial)>
running thread  <Thread(Thread-1006, initial)>
running thread  <Thread(Thread-1007, initial)>
running thread  <Thread(Thread-1008, initial)>
running thread  <Thread(Thread-1009, initial)>
running thread  <Thread(Thread-1010, initial)>
running thread  <Thread(Thread-1011, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.80it/s]


current loss: [[45.21844205]]
running thread  <Thread(Thread-1012, initial)>
running thread  <Thread(Thread-1013, initial)>
running thread  <Thread(Thread-1014, initial)>
running thread  <Thread(Thread-1015, initial)>
running thread  <Thread(Thread-1016, initial)>
running thread  <Thread(Thread-1017, initial)>
running thread  <Thread(Thread-1018, initial)>
running thread  <Thread(Thread-1019, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.11it/s]


current loss: [[7.79624072e+157]]
running thread  <Thread(Thread-1020, initial)>
running thread  <Thread(Thread-1021, initial)>
running thread  <Thread(Thread-1022, initial)>
running thread  <Thread(Thread-1023, initial)>
running thread  <Thread(Thread-1024, initial)>
running thread  <Thread(Thread-1025, initial)>
running thread  <Thread(Thread-1026, initial)>
running thread  <Thread(Thread-1027, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.51it/s]


current loss: [[47.83850855]]
running thread  <Thread(Thread-1028, initial)>
running thread  <Thread(Thread-1029, initial)>
running thread  <Thread(Thread-1030, initial)>
running thread  <Thread(Thread-1031, initial)>
running thread  <Thread(Thread-1032, initial)>
running thread  <Thread(Thread-1033, initial)>
running thread  <Thread(Thread-1034, initial)>
running thread  <Thread(Thread-1035, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.24it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-1036, initial)>
running thread  <Thread(Thread-1037, initial)>
running thread  <Thread(Thread-1038, initial)>
running thread  <Thread(Thread-1039, initial)>
running thread  <Thread(Thread-1040, initial)>
running thread  <Thread(Thread-1041, initial)>
running thread  <Thread(Thread-1042, initial)>
running thread  <Thread(Thread-1043, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.90it/s]


current loss: [[5.96599635e+53]]
running thread  <Thread(Thread-1044, initial)>
running thread  <Thread(Thread-1045, initial)>
running thread  <Thread(Thread-1046, initial)>
running thread  <Thread(Thread-1047, initial)>
running thread  <Thread(Thread-1048, initial)>
running thread  <Thread(Thread-1049, initial)>
running thread  <Thread(Thread-1050, initial)>
running thread  <Thread(Thread-1051, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.29it/s]


current loss: [[1.13231134e+50]]
running thread  <Thread(Thread-1052, initial)>
running thread  <Thread(Thread-1053, initial)>
running thread  <Thread(Thread-1054, initial)>
running thread  <Thread(Thread-1055, initial)>
running thread  <Thread(Thread-1056, initial)>
running thread  <Thread(Thread-1057, initial)>
running thread  <Thread(Thread-1058, initial)>
running thread  <Thread(Thread-1059, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.17it/s]


current loss: [[3.58305193e+169]]
running thread  <Thread(Thread-1060, initial)>
running thread  <Thread(Thread-1061, initial)>
running thread  <Thread(Thread-1062, initial)>
running thread  <Thread(Thread-1063, initial)>
running thread  <Thread(Thread-1064, initial)>
running thread  <Thread(Thread-1065, initial)>
running thread  <Thread(Thread-1066, initial)>
running thread  <Thread(Thread-1067, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.05it/s]


current loss: [[5.03782626e+183]]
running thread  <Thread(Thread-1068, initial)>
running thread  <Thread(Thread-1069, initial)>
running thread  <Thread(Thread-1070, initial)>
running thread  <Thread(Thread-1071, initial)>
running thread  <Thread(Thread-1072, initial)>
running thread  <Thread(Thread-1073, initial)>
running thread  <Thread(Thread-1074, initial)>
running thread  <Thread(Thread-1075, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.50it/s]


current loss: [[6.86298104e+43]]
running thread  <Thread(Thread-1076, initial)>
running thread  <Thread(Thread-1077, initial)>
running thread  <Thread(Thread-1078, initial)>
running thread  <Thread(Thread-1079, initial)>
running thread  <Thread(Thread-1080, initial)>
running thread  <Thread(Thread-1081, initial)>
running thread  <Thread(Thread-1082, initial)>
running thread  <Thread(Thread-1083, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.83it/s]


current loss: [[8.46800574e+92]]
running thread  <Thread(Thread-1084, initial)>
running thread  <Thread(Thread-1085, initial)>
running thread  <Thread(Thread-1086, initial)>
running thread  <Thread(Thread-1087, initial)>
running thread  <Thread(Thread-1088, initial)>
running thread  <Thread(Thread-1089, initial)>
running thread  <Thread(Thread-1090, initial)>
running thread  <Thread(Thread-1091, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.37it/s]


current loss: [[4.28721055e+26]]
running thread  <Thread(Thread-1092, initial)>
running thread  <Thread(Thread-1093, initial)>
running thread  <Thread(Thread-1094, initial)>
running thread  <Thread(Thread-1095, initial)>
running thread  <Thread(Thread-1096, initial)>
running thread  <Thread(Thread-1097, initial)>
running thread  <Thread(Thread-1098, initial)>
running thread  <Thread(Thread-1099, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.58it/s]


current loss: [[6.61790597e+203]]
running thread  <Thread(Thread-1100, initial)>
running thread  <Thread(Thread-1101, initial)>
running thread  <Thread(Thread-1102, initial)>
running thread  <Thread(Thread-1103, initial)>
running thread  <Thread(Thread-1104, initial)>
running thread  <Thread(Thread-1105, initial)>
running thread  <Thread(Thread-1106, initial)>
running thread  <Thread(Thread-1107, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.40it/s]


current loss: [[1.06751699e+127]]
running thread  <Thread(Thread-1108, initial)>
running thread  <Thread(Thread-1109, initial)>
running thread  <Thread(Thread-1110, initial)>
running thread  <Thread(Thread-1111, initial)>
running thread  <Thread(Thread-1112, initial)>
running thread  <Thread(Thread-1113, initial)>
running thread  <Thread(Thread-1114, initial)>
running thread  <Thread(Thread-1115, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.17it/s]


current loss: [[6.41435631e+277]]
running thread  <Thread(Thread-1116, initial)>
running thread  <Thread(Thread-1117, initial)>
running thread  <Thread(Thread-1118, initial)>
running thread  <Thread(Thread-1119, initial)>
running thread  <Thread(Thread-1120, initial)>
running thread  <Thread(Thread-1121, initial)>
running thread  <Thread(Thread-1122, initial)>
running thread  <Thread(Thread-1123, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.86it/s]


current loss: [[2.89013397e+242]]
running thread  <Thread(Thread-1124, initial)>
running thread  <Thread(Thread-1125, initial)>
running thread  <Thread(Thread-1126, initial)>
running thread  <Thread(Thread-1127, initial)>
running thread  <Thread(Thread-1128, initial)>
running thread  <Thread(Thread-1129, initial)>
running thread  <Thread(Thread-1130, initial)>
running thread  <Thread(Thread-1131, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.43it/s]


current loss: [[1.02279406e+32]]
running thread  <Thread(Thread-1132, initial)>
running thread  <Thread(Thread-1133, initial)>
running thread  <Thread(Thread-1134, initial)>
running thread  <Thread(Thread-1135, initial)>
running thread  <Thread(Thread-1136, initial)>
running thread  <Thread(Thread-1137, initial)>
running thread  <Thread(Thread-1138, initial)>
running thread  <Thread(Thread-1139, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.53it/s]


current loss: [[3.65949379e+84]]
running thread  <Thread(Thread-1140, initial)>
running thread  <Thread(Thread-1141, initial)>
running thread  <Thread(Thread-1142, initial)>
running thread  <Thread(Thread-1143, initial)>
running thread  <Thread(Thread-1144, initial)>
running thread  <Thread(Thread-1145, initial)>
running thread  <Thread(Thread-1146, initial)>
running thread  <Thread(Thread-1147, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.13it/s]


current loss: [[6.74155804e+84]]
running thread  <Thread(Thread-1148, initial)>
running thread  <Thread(Thread-1149, initial)>
running thread  <Thread(Thread-1150, initial)>
running thread  <Thread(Thread-1151, initial)>
running thread  <Thread(Thread-1152, initial)>
running thread  <Thread(Thread-1153, initial)>
running thread  <Thread(Thread-1154, initial)>
running thread  <Thread(Thread-1155, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.41it/s]


current loss: [[46.27600566]]
running thread  <Thread(Thread-1156, initial)>
running thread  <Thread(Thread-1157, initial)>
running thread  <Thread(Thread-1158, initial)>
running thread  <Thread(Thread-1159, initial)>
running thread  <Thread(Thread-1160, initial)>
running thread  <Thread(Thread-1161, initial)>
running thread  <Thread(Thread-1162, initial)>
running thread  <Thread(Thread-1163, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.40it/s]


current loss: [[3.07792991e+111]]
running thread  <Thread(Thread-1164, initial)>
running thread  <Thread(Thread-1165, initial)>
running thread  <Thread(Thread-1166, initial)>
running thread  <Thread(Thread-1167, initial)>
running thread  <Thread(Thread-1168, initial)>
running thread  <Thread(Thread-1169, initial)>
running thread  <Thread(Thread-1170, initial)>
running thread  <Thread(Thread-1171, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.37it/s]


current loss: [[3.6289028e+106]]
running thread  <Thread(Thread-1172, initial)>
running thread  <Thread(Thread-1173, initial)>
running thread  <Thread(Thread-1174, initial)>
running thread  <Thread(Thread-1175, initial)>
running thread  <Thread(Thread-1176, initial)>
running thread  <Thread(Thread-1177, initial)>
running thread  <Thread(Thread-1178, initial)>
running thread  <Thread(Thread-1179, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.58it/s]


current loss: [[4.01907903e+249]]
running thread  <Thread(Thread-1180, initial)>
running thread  <Thread(Thread-1181, initial)>
running thread  <Thread(Thread-1182, initial)>
running thread  <Thread(Thread-1183, initial)>
running thread  <Thread(Thread-1184, initial)>
running thread  <Thread(Thread-1185, initial)>
running thread  <Thread(Thread-1186, initial)>
running thread  <Thread(Thread-1187, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.87it/s]


current loss: [[2.67943462e+62]]
running thread  <Thread(Thread-1188, initial)>
running thread  <Thread(Thread-1189, initial)>
running thread  <Thread(Thread-1190, initial)>
running thread  <Thread(Thread-1191, initial)>
running thread  <Thread(Thread-1192, initial)>
running thread  <Thread(Thread-1193, initial)>
running thread  <Thread(Thread-1194, initial)>
running thread  <Thread(Thread-1195, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.04it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1196, initial)>
running thread  <Thread(Thread-1197, initial)>
running thread  <Thread(Thread-1198, initial)>
running thread  <Thread(Thread-1199, initial)>
running thread  <Thread(Thread-1200, initial)>
running thread  <Thread(Thread-1201, initial)>
running thread  <Thread(Thread-1202, initial)>
running thread  <Thread(Thread-1203, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.35it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1204, initial)>
running thread  <Thread(Thread-1205, initial)>
running thread  <Thread(Thread-1206, initial)>
running thread  <Thread(Thread-1207, initial)>
running thread  <Thread(Thread-1208, initial)>
running thread  <Thread(Thread-1209, initial)>
running thread  <Thread(Thread-1210, initial)>
running thread  <Thread(Thread-1211, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.71it/s]


current loss: [[2.65831675e+184]]
running thread  <Thread(Thread-1212, initial)>
running thread  <Thread(Thread-1213, initial)>
running thread  <Thread(Thread-1214, initial)>
running thread  <Thread(Thread-1215, initial)>
running thread  <Thread(Thread-1216, initial)>
running thread  <Thread(Thread-1217, initial)>
running thread  <Thread(Thread-1218, initial)>
running thread  <Thread(Thread-1219, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.54it/s]


current loss: [[5.62602315e+208]]
running thread  <Thread(Thread-1220, initial)>
running thread  <Thread(Thread-1221, initial)>
running thread  <Thread(Thread-1222, initial)>
running thread  <Thread(Thread-1223, initial)>
running thread  <Thread(Thread-1224, initial)>
running thread  <Thread(Thread-1225, initial)>
running thread  <Thread(Thread-1226, initial)>
running thread  <Thread(Thread-1227, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.55it/s]


current loss: [[45.19676763]]
running thread  <Thread(Thread-1228, initial)>
running thread  <Thread(Thread-1229, initial)>
running thread  <Thread(Thread-1230, initial)>
running thread  <Thread(Thread-1231, initial)>
running thread  <Thread(Thread-1232, initial)>
running thread  <Thread(Thread-1233, initial)>
running thread  <Thread(Thread-1234, initial)>
running thread  <Thread(Thread-1235, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.21it/s]


current loss: [[43.38195396]]
running thread  <Thread(Thread-1236, initial)>
running thread  <Thread(Thread-1237, initial)>
running thread  <Thread(Thread-1238, initial)>
running thread  <Thread(Thread-1239, initial)>
running thread  <Thread(Thread-1240, initial)>
running thread  <Thread(Thread-1241, initial)>
running thread  <Thread(Thread-1242, initial)>
running thread  <Thread(Thread-1243, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.19it/s]


current loss: [[2.75791247e+219]]
running thread  <Thread(Thread-1244, initial)>
running thread  <Thread(Thread-1245, initial)>
running thread  <Thread(Thread-1246, initial)>
running thread  <Thread(Thread-1247, initial)>
running thread  <Thread(Thread-1248, initial)>
running thread  <Thread(Thread-1249, initial)>
running thread  <Thread(Thread-1250, initial)>
running thread  <Thread(Thread-1251, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.94it/s]


current loss: [[3.18625073e+293]]
running thread  <Thread(Thread-1252, initial)>
running thread  <Thread(Thread-1253, initial)>
running thread  <Thread(Thread-1254, initial)>
running thread  <Thread(Thread-1255, initial)>
running thread  <Thread(Thread-1256, initial)>
running thread  <Thread(Thread-1257, initial)>
running thread  <Thread(Thread-1258, initial)>
running thread  <Thread(Thread-1259, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.41it/s]


current loss: [[49.92857311]]
running thread  <Thread(Thread-1260, initial)>
running thread  <Thread(Thread-1261, initial)>
running thread  <Thread(Thread-1262, initial)>
running thread  <Thread(Thread-1263, initial)>
running thread  <Thread(Thread-1264, initial)>
running thread  <Thread(Thread-1265, initial)>
running thread  <Thread(Thread-1266, initial)>
running thread  <Thread(Thread-1267, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.10it/s]


current loss: [[6.31760893e+167]]
running thread  <Thread(Thread-1268, initial)>
running thread  <Thread(Thread-1269, initial)>
running thread  <Thread(Thread-1270, initial)>
running thread  <Thread(Thread-1271, initial)>
running thread  <Thread(Thread-1272, initial)>
running thread  <Thread(Thread-1273, initial)>
running thread  <Thread(Thread-1274, initial)>
running thread  <Thread(Thread-1275, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.58it/s]


current loss: [[3.78088823e+76]]
running thread  <Thread(Thread-1276, initial)>
running thread  <Thread(Thread-1277, initial)>
running thread  <Thread(Thread-1278, initial)>
running thread  <Thread(Thread-1279, initial)>
running thread  <Thread(Thread-1280, initial)>
running thread  <Thread(Thread-1281, initial)>
running thread  <Thread(Thread-1282, initial)>
running thread  <Thread(Thread-1283, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.59it/s]


current loss: [[3.68050624e+165]]
running thread  <Thread(Thread-1284, initial)>
running thread  <Thread(Thread-1285, initial)>
running thread  <Thread(Thread-1286, initial)>
running thread  <Thread(Thread-1287, initial)>
running thread  <Thread(Thread-1288, initial)>
running thread  <Thread(Thread-1289, initial)>
running thread  <Thread(Thread-1290, initial)>
running thread  <Thread(Thread-1291, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 11.37it/s]


current loss: [[3.74858058e+121]]
running thread  <Thread(Thread-1292, initial)>
running thread  <Thread(Thread-1293, initial)>
running thread  <Thread(Thread-1294, initial)>
running thread  <Thread(Thread-1295, initial)>
running thread  <Thread(Thread-1296, initial)>
running thread  <Thread(Thread-1297, initial)>
running thread  <Thread(Thread-1298, initial)>
running thread  <Thread(Thread-1299, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 11.21it/s]


current loss: [[4.61361082e+62]]
running thread  <Thread(Thread-1300, initial)>
running thread  <Thread(Thread-1301, initial)>
running thread  <Thread(Thread-1302, initial)>
running thread  <Thread(Thread-1303, initial)>
running thread  <Thread(Thread-1304, initial)>
running thread  <Thread(Thread-1305, initial)>
running thread  <Thread(Thread-1306, initial)>
running thread  <Thread(Thread-1307, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.77it/s]


current loss: [[44.1975531]]
running thread  <Thread(Thread-1308, initial)>
running thread  <Thread(Thread-1309, initial)>
running thread  <Thread(Thread-1310, initial)>
running thread  <Thread(Thread-1311, initial)>
running thread  <Thread(Thread-1312, initial)>
running thread  <Thread(Thread-1313, initial)>
running thread  <Thread(Thread-1314, initial)>
running thread  <Thread(Thread-1315, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.05it/s]


current loss: [[2.39589073e+79]]
running thread  <Thread(Thread-1316, initial)>
running thread  <Thread(Thread-1317, initial)>
running thread  <Thread(Thread-1318, initial)>
running thread  <Thread(Thread-1319, initial)>
running thread  <Thread(Thread-1320, initial)>
running thread  <Thread(Thread-1321, initial)>
running thread  <Thread(Thread-1322, initial)>
running thread  <Thread(Thread-1323, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.50it/s]


current loss: [[48.37797939]]
running thread  <Thread(Thread-1324, initial)>
running thread  <Thread(Thread-1325, initial)>
running thread  <Thread(Thread-1326, initial)>
running thread  <Thread(Thread-1327, initial)>
running thread  <Thread(Thread-1328, initial)>
running thread  <Thread(Thread-1329, initial)>
running thread  <Thread(Thread-1330, initial)>
running thread  <Thread(Thread-1331, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.73it/s]


current loss: [[2.40193333e+56]]
running thread  <Thread(Thread-1332, initial)>
running thread  <Thread(Thread-1333, initial)>
running thread  <Thread(Thread-1334, initial)>
running thread  <Thread(Thread-1335, initial)>
running thread  <Thread(Thread-1336, initial)>
running thread  <Thread(Thread-1337, initial)>
running thread  <Thread(Thread-1338, initial)>
running thread  <Thread(Thread-1339, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.86it/s]


current loss: [[1.65357003e+95]]
running thread  <Thread(Thread-1340, initial)>
running thread  <Thread(Thread-1341, initial)>
running thread  <Thread(Thread-1342, initial)>
running thread  <Thread(Thread-1343, initial)>
running thread  <Thread(Thread-1344, initial)>
running thread  <Thread(Thread-1345, initial)>
running thread  <Thread(Thread-1346, initial)>
running thread  <Thread(Thread-1347, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.70it/s]


current loss: [[4.7146331e+175]]
running thread  <Thread(Thread-1348, initial)>
running thread  <Thread(Thread-1349, initial)>
running thread  <Thread(Thread-1350, initial)>
running thread  <Thread(Thread-1351, initial)>
running thread  <Thread(Thread-1352, initial)>
running thread  <Thread(Thread-1353, initial)>
running thread  <Thread(Thread-1354, initial)>
running thread  <Thread(Thread-1355, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.15it/s]


current loss: [[47.54279603]]
running thread  <Thread(Thread-1356, initial)>
running thread  <Thread(Thread-1357, initial)>
running thread  <Thread(Thread-1358, initial)>
running thread  <Thread(Thread-1359, initial)>
running thread  <Thread(Thread-1360, initial)>
running thread  <Thread(Thread-1361, initial)>
running thread  <Thread(Thread-1362, initial)>
running thread  <Thread(Thread-1363, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.93it/s]


current loss: [[3.10299469e+174]]
running thread  <Thread(Thread-1364, initial)>
running thread  <Thread(Thread-1365, initial)>
running thread  <Thread(Thread-1366, initial)>
running thread  <Thread(Thread-1367, initial)>
running thread  <Thread(Thread-1368, initial)>
running thread  <Thread(Thread-1369, initial)>
running thread  <Thread(Thread-1370, initial)>
running thread  <Thread(Thread-1371, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.31it/s]


current loss: [[5.16136533e+42]]
running thread  <Thread(Thread-1372, initial)>
running thread  <Thread(Thread-1373, initial)>
running thread  <Thread(Thread-1374, initial)>
running thread  <Thread(Thread-1375, initial)>
running thread  <Thread(Thread-1376, initial)>
running thread  <Thread(Thread-1377, initial)>
running thread  <Thread(Thread-1378, initial)>
running thread  <Thread(Thread-1379, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.48it/s]


current loss: [[5.35534488e+129]]
running thread  <Thread(Thread-1380, initial)>
running thread  <Thread(Thread-1381, initial)>
running thread  <Thread(Thread-1382, initial)>
running thread  <Thread(Thread-1383, initial)>
running thread  <Thread(Thread-1384, initial)>
running thread  <Thread(Thread-1385, initial)>
running thread  <Thread(Thread-1386, initial)>
running thread  <Thread(Thread-1387, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.23it/s]


current loss: [[49.90052634]]
running thread  <Thread(Thread-1388, initial)>
running thread  <Thread(Thread-1389, initial)>
running thread  <Thread(Thread-1390, initial)>
running thread  <Thread(Thread-1391, initial)>
running thread  <Thread(Thread-1392, initial)>
running thread  <Thread(Thread-1393, initial)>
running thread  <Thread(Thread-1394, initial)>
running thread  <Thread(Thread-1395, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.11it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1396, initial)>
running thread  <Thread(Thread-1397, initial)>
running thread  <Thread(Thread-1398, initial)>
running thread  <Thread(Thread-1399, initial)>
running thread  <Thread(Thread-1400, initial)>
running thread  <Thread(Thread-1401, initial)>
running thread  <Thread(Thread-1402, initial)>
running thread  <Thread(Thread-1403, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.52it/s]


current loss: [[3.0735033e+109]]
running thread  <Thread(Thread-1404, initial)>
running thread  <Thread(Thread-1405, initial)>
running thread  <Thread(Thread-1406, initial)>
running thread  <Thread(Thread-1407, initial)>
running thread  <Thread(Thread-1408, initial)>
running thread  <Thread(Thread-1409, initial)>
running thread  <Thread(Thread-1410, initial)>
running thread  <Thread(Thread-1411, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.25it/s]


current loss: [[1.31288836e+73]]
running thread  <Thread(Thread-1412, initial)>
running thread  <Thread(Thread-1413, initial)>
running thread  <Thread(Thread-1414, initial)>
running thread  <Thread(Thread-1415, initial)>
running thread  <Thread(Thread-1416, initial)>
running thread  <Thread(Thread-1417, initial)>
running thread  <Thread(Thread-1418, initial)>
running thread  <Thread(Thread-1419, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.42it/s]


current loss: [[47.47170419]]
running thread  <Thread(Thread-1420, initial)>
running thread  <Thread(Thread-1421, initial)>
running thread  <Thread(Thread-1422, initial)>
running thread  <Thread(Thread-1423, initial)>
running thread  <Thread(Thread-1424, initial)>
running thread  <Thread(Thread-1425, initial)>
running thread  <Thread(Thread-1426, initial)>
running thread  <Thread(Thread-1427, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.22it/s]


current loss: [[1.70592082e+63]]
running thread  <Thread(Thread-1428, initial)>
running thread  <Thread(Thread-1429, initial)>
running thread  <Thread(Thread-1430, initial)>
running thread  <Thread(Thread-1431, initial)>
running thread  <Thread(Thread-1432, initial)>
running thread  <Thread(Thread-1433, initial)>
running thread  <Thread(Thread-1434, initial)>
running thread  <Thread(Thread-1435, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.59it/s]


current loss: [[7.56284249e+147]]
running thread  <Thread(Thread-1436, initial)>
running thread  <Thread(Thread-1437, initial)>
running thread  <Thread(Thread-1438, initial)>
running thread  <Thread(Thread-1439, initial)>
running thread  <Thread(Thread-1440, initial)>
running thread  <Thread(Thread-1441, initial)>
running thread  <Thread(Thread-1442, initial)>
running thread  <Thread(Thread-1443, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.52it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-1444, initial)>
running thread  <Thread(Thread-1445, initial)>
running thread  <Thread(Thread-1446, initial)>
running thread  <Thread(Thread-1447, initial)>
running thread  <Thread(Thread-1448, initial)>
running thread  <Thread(Thread-1449, initial)>
running thread  <Thread(Thread-1450, initial)>
running thread  <Thread(Thread-1451, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.54it/s]


current loss: [[1.37943582e+163]]
running thread  <Thread(Thread-1452, initial)>
running thread  <Thread(Thread-1453, initial)>
running thread  <Thread(Thread-1454, initial)>
running thread  <Thread(Thread-1455, initial)>
running thread  <Thread(Thread-1456, initial)>
running thread  <Thread(Thread-1457, initial)>
running thread  <Thread(Thread-1458, initial)>
running thread  <Thread(Thread-1459, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.47it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1460, initial)>
running thread  <Thread(Thread-1461, initial)>
running thread  <Thread(Thread-1462, initial)>
running thread  <Thread(Thread-1463, initial)>
running thread  <Thread(Thread-1464, initial)>
running thread  <Thread(Thread-1465, initial)>
running thread  <Thread(Thread-1466, initial)>
running thread  <Thread(Thread-1467, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.47it/s]


current loss: [[45.03409775]]
running thread  <Thread(Thread-1468, initial)>
running thread  <Thread(Thread-1469, initial)>
running thread  <Thread(Thread-1470, initial)>
running thread  <Thread(Thread-1471, initial)>
running thread  <Thread(Thread-1472, initial)>
running thread  <Thread(Thread-1473, initial)>
running thread  <Thread(Thread-1474, initial)>
running thread  <Thread(Thread-1475, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.47it/s]


current loss: [[1.79843914e+214]]
running thread  <Thread(Thread-1476, initial)>
running thread  <Thread(Thread-1477, initial)>
running thread  <Thread(Thread-1478, initial)>
running thread  <Thread(Thread-1479, initial)>
running thread  <Thread(Thread-1480, initial)>
running thread  <Thread(Thread-1481, initial)>
running thread  <Thread(Thread-1482, initial)>
running thread  <Thread(Thread-1483, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.91it/s]


current loss: [[5.62074626e+36]]
running thread  <Thread(Thread-1484, initial)>
running thread  <Thread(Thread-1485, initial)>
running thread  <Thread(Thread-1486, initial)>
running thread  <Thread(Thread-1487, initial)>
running thread  <Thread(Thread-1488, initial)>
running thread  <Thread(Thread-1489, initial)>
running thread  <Thread(Thread-1490, initial)>
running thread  <Thread(Thread-1491, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.26it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1492, initial)>
running thread  <Thread(Thread-1493, initial)>
running thread  <Thread(Thread-1494, initial)>
running thread  <Thread(Thread-1495, initial)>
running thread  <Thread(Thread-1496, initial)>
running thread  <Thread(Thread-1497, initial)>
running thread  <Thread(Thread-1498, initial)>
running thread  <Thread(Thread-1499, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.81it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1500, initial)>
running thread  <Thread(Thread-1501, initial)>
running thread  <Thread(Thread-1502, initial)>
running thread  <Thread(Thread-1503, initial)>
running thread  <Thread(Thread-1504, initial)>
running thread  <Thread(Thread-1505, initial)>
running thread  <Thread(Thread-1506, initial)>
running thread  <Thread(Thread-1507, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.98it/s]


current loss: [[49.78194852]]
running thread  <Thread(Thread-1508, initial)>
running thread  <Thread(Thread-1509, initial)>
running thread  <Thread(Thread-1510, initial)>
running thread  <Thread(Thread-1511, initial)>
running thread  <Thread(Thread-1512, initial)>
running thread  <Thread(Thread-1513, initial)>
running thread  <Thread(Thread-1514, initial)>
running thread  <Thread(Thread-1515, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.79it/s]


current loss: [[46.27248502]]
running thread  <Thread(Thread-1516, initial)>
running thread  <Thread(Thread-1517, initial)>
running thread  <Thread(Thread-1518, initial)>
running thread  <Thread(Thread-1519, initial)>
running thread  <Thread(Thread-1520, initial)>
running thread  <Thread(Thread-1521, initial)>
running thread  <Thread(Thread-1522, initial)>
running thread  <Thread(Thread-1523, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.27it/s]


current loss: [[2.75054913e+175]]
running thread  <Thread(Thread-1524, initial)>
running thread  <Thread(Thread-1525, initial)>
running thread  <Thread(Thread-1526, initial)>
running thread  <Thread(Thread-1527, initial)>
running thread  <Thread(Thread-1528, initial)>
running thread  <Thread(Thread-1529, initial)>
running thread  <Thread(Thread-1530, initial)>
running thread  <Thread(Thread-1531, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.40it/s]


current loss: [[7.06514755e+122]]
running thread  <Thread(Thread-1532, initial)>
running thread  <Thread(Thread-1533, initial)>
running thread  <Thread(Thread-1534, initial)>
running thread  <Thread(Thread-1535, initial)>
running thread  <Thread(Thread-1536, initial)>
running thread  <Thread(Thread-1537, initial)>
running thread  <Thread(Thread-1538, initial)>
running thread  <Thread(Thread-1539, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.07it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-1540, initial)>
running thread  <Thread(Thread-1541, initial)>
running thread  <Thread(Thread-1542, initial)>
running thread  <Thread(Thread-1543, initial)>
running thread  <Thread(Thread-1544, initial)>
running thread  <Thread(Thread-1545, initial)>
running thread  <Thread(Thread-1546, initial)>
running thread  <Thread(Thread-1547, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.66it/s]


current loss: [[8.72600814e+274]]
running thread  <Thread(Thread-1548, initial)>
running thread  <Thread(Thread-1549, initial)>
running thread  <Thread(Thread-1550, initial)>
running thread  <Thread(Thread-1551, initial)>
running thread  <Thread(Thread-1552, initial)>
running thread  <Thread(Thread-1553, initial)>
running thread  <Thread(Thread-1554, initial)>
running thread  <Thread(Thread-1555, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.71it/s]


current loss: [[1.73190156e+262]]
running thread  <Thread(Thread-1556, initial)>
running thread  <Thread(Thread-1557, initial)>
running thread  <Thread(Thread-1558, initial)>
running thread  <Thread(Thread-1559, initial)>
running thread  <Thread(Thread-1560, initial)>
running thread  <Thread(Thread-1561, initial)>
running thread  <Thread(Thread-1562, initial)>
running thread  <Thread(Thread-1563, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.53it/s]


current loss: [[2.75130112e+289]]
running thread  <Thread(Thread-1564, initial)>
running thread  <Thread(Thread-1565, initial)>
running thread  <Thread(Thread-1566, initial)>
running thread  <Thread(Thread-1567, initial)>
running thread  <Thread(Thread-1568, initial)>
running thread  <Thread(Thread-1569, initial)>
running thread  <Thread(Thread-1570, initial)>
running thread  <Thread(Thread-1571, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.22it/s]


current loss: [[49.76483237]]
running thread  <Thread(Thread-1572, initial)>
running thread  <Thread(Thread-1573, initial)>
running thread  <Thread(Thread-1574, initial)>
running thread  <Thread(Thread-1575, initial)>
running thread  <Thread(Thread-1576, initial)>
running thread  <Thread(Thread-1577, initial)>
running thread  <Thread(Thread-1578, initial)>
running thread  <Thread(Thread-1579, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.45it/s]


current loss: [[5.43154197e+280]]
running thread  <Thread(Thread-1580, initial)>
running thread  <Thread(Thread-1581, initial)>
running thread  <Thread(Thread-1582, initial)>
running thread  <Thread(Thread-1583, initial)>
running thread  <Thread(Thread-1584, initial)>
running thread  <Thread(Thread-1585, initial)>
running thread  <Thread(Thread-1586, initial)>
running thread  <Thread(Thread-1587, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.20it/s]


current loss: [[8.22167586e+230]]
running thread  <Thread(Thread-1588, initial)>
running thread  <Thread(Thread-1589, initial)>
running thread  <Thread(Thread-1590, initial)>
running thread  <Thread(Thread-1591, initial)>
running thread  <Thread(Thread-1592, initial)>
running thread  <Thread(Thread-1593, initial)>
running thread  <Thread(Thread-1594, initial)>
running thread  <Thread(Thread-1595, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.81it/s]


current loss: [[44.75546019]]
running thread  <Thread(Thread-1596, initial)>
running thread  <Thread(Thread-1597, initial)>
running thread  <Thread(Thread-1598, initial)>
running thread  <Thread(Thread-1599, initial)>
running thread  <Thread(Thread-1600, initial)>
running thread  <Thread(Thread-1601, initial)>
running thread  <Thread(Thread-1602, initial)>
running thread  <Thread(Thread-1603, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.22it/s]


current loss: [[50.67081582]]
running thread  <Thread(Thread-1604, initial)>
running thread  <Thread(Thread-1605, initial)>
running thread  <Thread(Thread-1606, initial)>
running thread  <Thread(Thread-1607, initial)>
running thread  <Thread(Thread-1608, initial)>
running thread  <Thread(Thread-1609, initial)>
running thread  <Thread(Thread-1610, initial)>
running thread  <Thread(Thread-1611, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.83it/s]


current loss: [[49.12834663]]
running thread  <Thread(Thread-1612, initial)>
running thread  <Thread(Thread-1613, initial)>
running thread  <Thread(Thread-1614, initial)>
running thread  <Thread(Thread-1615, initial)>
running thread  <Thread(Thread-1616, initial)>
running thread  <Thread(Thread-1617, initial)>
running thread  <Thread(Thread-1618, initial)>
running thread  <Thread(Thread-1619, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.56it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1620, initial)>
running thread  <Thread(Thread-1621, initial)>
running thread  <Thread(Thread-1622, initial)>
running thread  <Thread(Thread-1623, initial)>
running thread  <Thread(Thread-1624, initial)>
running thread  <Thread(Thread-1625, initial)>
running thread  <Thread(Thread-1626, initial)>
running thread  <Thread(Thread-1627, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.82it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-1628, initial)>
running thread  <Thread(Thread-1629, initial)>
running thread  <Thread(Thread-1630, initial)>
running thread  <Thread(Thread-1631, initial)>
running thread  <Thread(Thread-1632, initial)>
running thread  <Thread(Thread-1633, initial)>
running thread  <Thread(Thread-1634, initial)>
running thread  <Thread(Thread-1635, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.58it/s]


current loss: [[3.69712409e+99]]
running thread  <Thread(Thread-1636, initial)>
running thread  <Thread(Thread-1637, initial)>
running thread  <Thread(Thread-1638, initial)>
running thread  <Thread(Thread-1639, initial)>
running thread  <Thread(Thread-1640, initial)>
running thread  <Thread(Thread-1641, initial)>
running thread  <Thread(Thread-1642, initial)>
running thread  <Thread(Thread-1643, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.70it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1644, initial)>
running thread  <Thread(Thread-1645, initial)>
running thread  <Thread(Thread-1646, initial)>
running thread  <Thread(Thread-1647, initial)>
running thread  <Thread(Thread-1648, initial)>
running thread  <Thread(Thread-1649, initial)>
running thread  <Thread(Thread-1650, initial)>
running thread  <Thread(Thread-1651, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.05it/s]


current loss: [[48.71036895]]
running thread  <Thread(Thread-1652, initial)>
running thread  <Thread(Thread-1653, initial)>
running thread  <Thread(Thread-1654, initial)>
running thread  <Thread(Thread-1655, initial)>
running thread  <Thread(Thread-1656, initial)>
running thread  <Thread(Thread-1657, initial)>
running thread  <Thread(Thread-1658, initial)>
running thread  <Thread(Thread-1659, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.31it/s]


current loss: [[6.21418623e+135]]
running thread  <Thread(Thread-1660, initial)>
running thread  <Thread(Thread-1661, initial)>
running thread  <Thread(Thread-1662, initial)>
running thread  <Thread(Thread-1663, initial)>
running thread  <Thread(Thread-1664, initial)>
running thread  <Thread(Thread-1665, initial)>
running thread  <Thread(Thread-1666, initial)>
running thread  <Thread(Thread-1667, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.96it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1668, initial)>
running thread  <Thread(Thread-1669, initial)>
running thread  <Thread(Thread-1670, initial)>
running thread  <Thread(Thread-1671, initial)>
running thread  <Thread(Thread-1672, initial)>
running thread  <Thread(Thread-1673, initial)>
running thread  <Thread(Thread-1674, initial)>
running thread  <Thread(Thread-1675, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.39it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1676, initial)>
running thread  <Thread(Thread-1677, initial)>
running thread  <Thread(Thread-1678, initial)>
running thread  <Thread(Thread-1679, initial)>
running thread  <Thread(Thread-1680, initial)>
running thread  <Thread(Thread-1681, initial)>
running thread  <Thread(Thread-1682, initial)>
running thread  <Thread(Thread-1683, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.57it/s]


current loss: [[48.91296695]]
running thread  <Thread(Thread-1684, initial)>
running thread  <Thread(Thread-1685, initial)>
running thread  <Thread(Thread-1686, initial)>
running thread  <Thread(Thread-1687, initial)>
running thread  <Thread(Thread-1688, initial)>
running thread  <Thread(Thread-1689, initial)>
running thread  <Thread(Thread-1690, initial)>
running thread  <Thread(Thread-1691, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.80it/s]


current loss: [[46.35718152]]
running thread  <Thread(Thread-1692, initial)>
running thread  <Thread(Thread-1693, initial)>
running thread  <Thread(Thread-1694, initial)>
running thread  <Thread(Thread-1695, initial)>
running thread  <Thread(Thread-1696, initial)>
running thread  <Thread(Thread-1697, initial)>
running thread  <Thread(Thread-1698, initial)>
running thread  <Thread(Thread-1699, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.33it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1700, initial)>
running thread  <Thread(Thread-1701, initial)>
running thread  <Thread(Thread-1702, initial)>
running thread  <Thread(Thread-1703, initial)>
running thread  <Thread(Thread-1704, initial)>
running thread  <Thread(Thread-1705, initial)>
running thread  <Thread(Thread-1706, initial)>
running thread  <Thread(Thread-1707, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.53it/s]


current loss: [[4.78338679e+260]]
running thread  <Thread(Thread-1708, initial)>
running thread  <Thread(Thread-1709, initial)>
running thread  <Thread(Thread-1710, initial)>
running thread  <Thread(Thread-1711, initial)>
running thread  <Thread(Thread-1712, initial)>
running thread  <Thread(Thread-1713, initial)>
running thread  <Thread(Thread-1714, initial)>
running thread  <Thread(Thread-1715, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.74it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1716, initial)>
running thread  <Thread(Thread-1717, initial)>
running thread  <Thread(Thread-1718, initial)>
running thread  <Thread(Thread-1719, initial)>
running thread  <Thread(Thread-1720, initial)>
running thread  <Thread(Thread-1721, initial)>
running thread  <Thread(Thread-1722, initial)>
running thread  <Thread(Thread-1723, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.23it/s]


current loss: [[48.48168272]]
running thread  <Thread(Thread-1724, initial)>
running thread  <Thread(Thread-1725, initial)>
running thread  <Thread(Thread-1726, initial)>
running thread  <Thread(Thread-1727, initial)>
running thread  <Thread(Thread-1728, initial)>
running thread  <Thread(Thread-1729, initial)>
running thread  <Thread(Thread-1730, initial)>
running thread  <Thread(Thread-1731, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.00it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1732, initial)>
running thread  <Thread(Thread-1733, initial)>
running thread  <Thread(Thread-1734, initial)>
running thread  <Thread(Thread-1735, initial)>
running thread  <Thread(Thread-1736, initial)>
running thread  <Thread(Thread-1737, initial)>
running thread  <Thread(Thread-1738, initial)>
running thread  <Thread(Thread-1739, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.82it/s]


current loss: [[4.98072978e+303]]
running thread  <Thread(Thread-1740, initial)>
running thread  <Thread(Thread-1741, initial)>
running thread  <Thread(Thread-1742, initial)>
running thread  <Thread(Thread-1743, initial)>
running thread  <Thread(Thread-1744, initial)>
running thread  <Thread(Thread-1745, initial)>
running thread  <Thread(Thread-1746, initial)>
running thread  <Thread(Thread-1747, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.69it/s]


current loss: [[45.77639063]]
running thread  <Thread(Thread-1748, initial)>
running thread  <Thread(Thread-1749, initial)>
running thread  <Thread(Thread-1750, initial)>
running thread  <Thread(Thread-1751, initial)>
running thread  <Thread(Thread-1752, initial)>
running thread  <Thread(Thread-1753, initial)>
running thread  <Thread(Thread-1754, initial)>
running thread  <Thread(Thread-1755, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.30it/s]


current loss: [[3.89792075e+13]]
running thread  <Thread(Thread-1756, initial)>
running thread  <Thread(Thread-1757, initial)>
running thread  <Thread(Thread-1758, initial)>
running thread  <Thread(Thread-1759, initial)>
running thread  <Thread(Thread-1760, initial)>
running thread  <Thread(Thread-1761, initial)>
running thread  <Thread(Thread-1762, initial)>
running thread  <Thread(Thread-1763, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.88it/s]


current loss: [[46.33674605]]
running thread  <Thread(Thread-1764, initial)>
running thread  <Thread(Thread-1765, initial)>
running thread  <Thread(Thread-1766, initial)>
running thread  <Thread(Thread-1767, initial)>
running thread  <Thread(Thread-1768, initial)>
running thread  <Thread(Thread-1769, initial)>
running thread  <Thread(Thread-1770, initial)>
running thread  <Thread(Thread-1771, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.41it/s]


current loss: [[6.1429527e+244]]
running thread  <Thread(Thread-1772, initial)>
running thread  <Thread(Thread-1773, initial)>
running thread  <Thread(Thread-1774, initial)>
running thread  <Thread(Thread-1775, initial)>
running thread  <Thread(Thread-1776, initial)>
running thread  <Thread(Thread-1777, initial)>
running thread  <Thread(Thread-1778, initial)>
running thread  <Thread(Thread-1779, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.80it/s]


current loss: [[48.23291228]]
running thread  <Thread(Thread-1780, initial)>
running thread  <Thread(Thread-1781, initial)>
running thread  <Thread(Thread-1782, initial)>
running thread  <Thread(Thread-1783, initial)>
running thread  <Thread(Thread-1784, initial)>
running thread  <Thread(Thread-1785, initial)>
running thread  <Thread(Thread-1786, initial)>
running thread  <Thread(Thread-1787, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.86it/s]


current loss: [[47.10385302]]
running thread  <Thread(Thread-1788, initial)>
running thread  <Thread(Thread-1789, initial)>
running thread  <Thread(Thread-1790, initial)>
running thread  <Thread(Thread-1791, initial)>
running thread  <Thread(Thread-1792, initial)>
running thread  <Thread(Thread-1793, initial)>
running thread  <Thread(Thread-1794, initial)>
running thread  <Thread(Thread-1795, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.99it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1796, initial)>
running thread  <Thread(Thread-1797, initial)>
running thread  <Thread(Thread-1798, initial)>
running thread  <Thread(Thread-1799, initial)>
running thread  <Thread(Thread-1800, initial)>
running thread  <Thread(Thread-1801, initial)>
running thread  <Thread(Thread-1802, initial)>
running thread  <Thread(Thread-1803, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.63it/s]


current loss: [[7.42173519e+228]]
running thread  <Thread(Thread-1804, initial)>
running thread  <Thread(Thread-1805, initial)>
running thread  <Thread(Thread-1806, initial)>
running thread  <Thread(Thread-1807, initial)>
running thread  <Thread(Thread-1808, initial)>
running thread  <Thread(Thread-1809, initial)>
running thread  <Thread(Thread-1810, initial)>
running thread  <Thread(Thread-1811, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.66it/s]


current loss: [[1.62786754e+130]]
running thread  <Thread(Thread-1812, initial)>
running thread  <Thread(Thread-1813, initial)>
running thread  <Thread(Thread-1814, initial)>
running thread  <Thread(Thread-1815, initial)>
running thread  <Thread(Thread-1816, initial)>
running thread  <Thread(Thread-1817, initial)>
running thread  <Thread(Thread-1818, initial)>
running thread  <Thread(Thread-1819, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.28it/s]


current loss: [[43.94055111]]
running thread  <Thread(Thread-1820, initial)>
running thread  <Thread(Thread-1821, initial)>
running thread  <Thread(Thread-1822, initial)>
running thread  <Thread(Thread-1823, initial)>
running thread  <Thread(Thread-1824, initial)>
running thread  <Thread(Thread-1825, initial)>
running thread  <Thread(Thread-1826, initial)>
running thread  <Thread(Thread-1827, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.49it/s]


current loss: [[44.87564114]]
running thread  <Thread(Thread-1828, initial)>
running thread  <Thread(Thread-1829, initial)>
running thread  <Thread(Thread-1830, initial)>
running thread  <Thread(Thread-1831, initial)>
running thread  <Thread(Thread-1832, initial)>
running thread  <Thread(Thread-1833, initial)>
running thread  <Thread(Thread-1834, initial)>
running thread  <Thread(Thread-1835, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.83it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-1836, initial)>
running thread  <Thread(Thread-1837, initial)>
running thread  <Thread(Thread-1838, initial)>
running thread  <Thread(Thread-1839, initial)>
running thread  <Thread(Thread-1840, initial)>
running thread  <Thread(Thread-1841, initial)>
running thread  <Thread(Thread-1842, initial)>
running thread  <Thread(Thread-1843, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.02it/s]


current loss: [[46.39315164]]
running thread  <Thread(Thread-1844, initial)>
running thread  <Thread(Thread-1845, initial)>
running thread  <Thread(Thread-1846, initial)>
running thread  <Thread(Thread-1847, initial)>
running thread  <Thread(Thread-1848, initial)>
running thread  <Thread(Thread-1849, initial)>
running thread  <Thread(Thread-1850, initial)>
running thread  <Thread(Thread-1851, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.04it/s]


current loss: [[44.69163941]]
running thread  <Thread(Thread-1852, initial)>
running thread  <Thread(Thread-1853, initial)>
running thread  <Thread(Thread-1854, initial)>
running thread  <Thread(Thread-1855, initial)>
running thread  <Thread(Thread-1856, initial)>
running thread  <Thread(Thread-1857, initial)>
running thread  <Thread(Thread-1858, initial)>
running thread  <Thread(Thread-1859, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.78it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1860, initial)>
running thread  <Thread(Thread-1861, initial)>
running thread  <Thread(Thread-1862, initial)>
running thread  <Thread(Thread-1863, initial)>
running thread  <Thread(Thread-1864, initial)>
running thread  <Thread(Thread-1865, initial)>
running thread  <Thread(Thread-1866, initial)>
running thread  <Thread(Thread-1867, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.30it/s]


current loss: [[49.43369433]]
running thread  <Thread(Thread-1868, initial)>
running thread  <Thread(Thread-1869, initial)>
running thread  <Thread(Thread-1870, initial)>
running thread  <Thread(Thread-1871, initial)>
running thread  <Thread(Thread-1872, initial)>
running thread  <Thread(Thread-1873, initial)>
running thread  <Thread(Thread-1874, initial)>
running thread  <Thread(Thread-1875, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.85it/s]


current loss: [[1.12465915e+37]]
running thread  <Thread(Thread-1876, initial)>
running thread  <Thread(Thread-1877, initial)>
running thread  <Thread(Thread-1878, initial)>
running thread  <Thread(Thread-1879, initial)>
running thread  <Thread(Thread-1880, initial)>
running thread  <Thread(Thread-1881, initial)>
running thread  <Thread(Thread-1882, initial)>
running thread  <Thread(Thread-1883, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.46it/s]


current loss: [[3.02614367e+144]]
running thread  <Thread(Thread-1884, initial)>
running thread  <Thread(Thread-1885, initial)>
running thread  <Thread(Thread-1886, initial)>
running thread  <Thread(Thread-1887, initial)>
running thread  <Thread(Thread-1888, initial)>
running thread  <Thread(Thread-1889, initial)>
running thread  <Thread(Thread-1890, initial)>
running thread  <Thread(Thread-1891, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.81it/s]


current loss: [[nan]]
running thread  <Thread(Thread-1892, initial)>
running thread  <Thread(Thread-1893, initial)>
running thread  <Thread(Thread-1894, initial)>
running thread  <Thread(Thread-1895, initial)>
running thread  <Thread(Thread-1896, initial)>
running thread  <Thread(Thread-1897, initial)>
running thread  <Thread(Thread-1898, initial)>
running thread  <Thread(Thread-1899, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.15it/s]


current loss: [[8.052314e+158]]
running thread  <Thread(Thread-1900, initial)>
running thread  <Thread(Thread-1901, initial)>
running thread  <Thread(Thread-1902, initial)>
running thread  <Thread(Thread-1903, initial)>
running thread  <Thread(Thread-1904, initial)>
running thread  <Thread(Thread-1905, initial)>
running thread  <Thread(Thread-1906, initial)>
running thread  <Thread(Thread-1907, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.11it/s]


current loss: [[8.95603867e+180]]
running thread  <Thread(Thread-1908, initial)>
running thread  <Thread(Thread-1909, initial)>
running thread  <Thread(Thread-1910, initial)>
running thread  <Thread(Thread-1911, initial)>
running thread  <Thread(Thread-1912, initial)>
running thread  <Thread(Thread-1913, initial)>
running thread  <Thread(Thread-1914, initial)>
running thread  <Thread(Thread-1915, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.74it/s]


current loss: [[48.52422209]]
running thread  <Thread(Thread-1916, initial)>
running thread  <Thread(Thread-1917, initial)>
running thread  <Thread(Thread-1918, initial)>
running thread  <Thread(Thread-1919, initial)>
running thread  <Thread(Thread-1920, initial)>
running thread  <Thread(Thread-1921, initial)>
running thread  <Thread(Thread-1922, initial)>
running thread  <Thread(Thread-1923, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.05it/s]


current loss: [[41.63906529]]
running thread  <Thread(Thread-1924, initial)>
running thread  <Thread(Thread-1925, initial)>
running thread  <Thread(Thread-1926, initial)>
running thread  <Thread(Thread-1927, initial)>
running thread  <Thread(Thread-1928, initial)>
running thread  <Thread(Thread-1929, initial)>
running thread  <Thread(Thread-1930, initial)>
running thread  <Thread(Thread-1931, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.13it/s]


current loss: [[45.92123626]]
running thread  <Thread(Thread-1932, initial)>
running thread  <Thread(Thread-1933, initial)>
running thread  <Thread(Thread-1934, initial)>
running thread  <Thread(Thread-1935, initial)>
running thread  <Thread(Thread-1936, initial)>
running thread  <Thread(Thread-1937, initial)>
running thread  <Thread(Thread-1938, initial)>
running thread  <Thread(Thread-1939, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.49it/s]


current loss: [[5.31672749e+26]]
running thread  <Thread(Thread-1940, initial)>
running thread  <Thread(Thread-1941, initial)>
running thread  <Thread(Thread-1942, initial)>
running thread  <Thread(Thread-1943, initial)>
running thread  <Thread(Thread-1944, initial)>
running thread  <Thread(Thread-1945, initial)>
running thread  <Thread(Thread-1946, initial)>
running thread  <Thread(Thread-1947, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.30it/s]


current loss: [[46.05586349]]
running thread  <Thread(Thread-1948, initial)>
running thread  <Thread(Thread-1949, initial)>
running thread  <Thread(Thread-1950, initial)>
running thread  <Thread(Thread-1951, initial)>
running thread  <Thread(Thread-1952, initial)>
running thread  <Thread(Thread-1953, initial)>
running thread  <Thread(Thread-1954, initial)>
running thread  <Thread(Thread-1955, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.81it/s]


current loss: [[46.97226908]]
running thread  <Thread(Thread-1956, initial)>
running thread  <Thread(Thread-1957, initial)>
running thread  <Thread(Thread-1958, initial)>
running thread  <Thread(Thread-1959, initial)>
running thread  <Thread(Thread-1960, initial)>
running thread  <Thread(Thread-1961, initial)>
running thread  <Thread(Thread-1962, initial)>
running thread  <Thread(Thread-1963, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.21it/s]


current loss: [[5.43643497e+123]]
running thread  <Thread(Thread-1964, initial)>
running thread  <Thread(Thread-1965, initial)>
running thread  <Thread(Thread-1966, initial)>
running thread  <Thread(Thread-1967, initial)>
running thread  <Thread(Thread-1968, initial)>
running thread  <Thread(Thread-1969, initial)>
running thread  <Thread(Thread-1970, initial)>
running thread  <Thread(Thread-1971, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.96it/s]


current loss: [[4.79481877e+254]]
running thread  <Thread(Thread-1972, initial)>
running thread  <Thread(Thread-1973, initial)>
running thread  <Thread(Thread-1974, initial)>
running thread  <Thread(Thread-1975, initial)>
running thread  <Thread(Thread-1976, initial)>
running thread  <Thread(Thread-1977, initial)>
running thread  <Thread(Thread-1978, initial)>
running thread  <Thread(Thread-1979, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.63it/s]


current loss: [[49.28929845]]
running thread  <Thread(Thread-1980, initial)>
running thread  <Thread(Thread-1981, initial)>
running thread  <Thread(Thread-1982, initial)>
running thread  <Thread(Thread-1983, initial)>
running thread  <Thread(Thread-1984, initial)>
running thread  <Thread(Thread-1985, initial)>
running thread  <Thread(Thread-1986, initial)>
running thread  <Thread(Thread-1987, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.80it/s]


current loss: [[7.70323084e+215]]
running thread  <Thread(Thread-1988, initial)>
running thread  <Thread(Thread-1989, initial)>
running thread  <Thread(Thread-1990, initial)>
running thread  <Thread(Thread-1991, initial)>
running thread  <Thread(Thread-1992, initial)>
running thread  <Thread(Thread-1993, initial)>
running thread  <Thread(Thread-1994, initial)>
running thread  <Thread(Thread-1995, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.56it/s]


current loss: [[1.69964724e+37]]
running thread  <Thread(Thread-1996, initial)>
running thread  <Thread(Thread-1997, initial)>
running thread  <Thread(Thread-1998, initial)>
running thread  <Thread(Thread-1999, initial)>
running thread  <Thread(Thread-2000, initial)>
running thread  <Thread(Thread-2001, initial)>
running thread  <Thread(Thread-2002, initial)>
running thread  <Thread(Thread-2003, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.47it/s]


current loss: [[9.06228183e+57]]
running thread  <Thread(Thread-2004, initial)>
running thread  <Thread(Thread-2005, initial)>
running thread  <Thread(Thread-2006, initial)>
running thread  <Thread(Thread-2007, initial)>
running thread  <Thread(Thread-2008, initial)>
running thread  <Thread(Thread-2009, initial)>
running thread  <Thread(Thread-2010, initial)>
running thread  <Thread(Thread-2011, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.74it/s]


current loss: [[2.51064359e+238]]
running thread  <Thread(Thread-2012, initial)>
running thread  <Thread(Thread-2013, initial)>
running thread  <Thread(Thread-2014, initial)>
running thread  <Thread(Thread-2015, initial)>
running thread  <Thread(Thread-2016, initial)>
running thread  <Thread(Thread-2017, initial)>
running thread  <Thread(Thread-2018, initial)>
running thread  <Thread(Thread-2019, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.15it/s]


current loss: [[45.26614465]]
running thread  <Thread(Thread-2020, initial)>
running thread  <Thread(Thread-2021, initial)>
running thread  <Thread(Thread-2022, initial)>
running thread  <Thread(Thread-2023, initial)>
running thread  <Thread(Thread-2024, initial)>
running thread  <Thread(Thread-2025, initial)>
running thread  <Thread(Thread-2026, initial)>
running thread  <Thread(Thread-2027, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.80it/s]


current loss: [[44.43521301]]
running thread  <Thread(Thread-2028, initial)>
running thread  <Thread(Thread-2029, initial)>
running thread  <Thread(Thread-2030, initial)>
running thread  <Thread(Thread-2031, initial)>
running thread  <Thread(Thread-2032, initial)>
running thread  <Thread(Thread-2033, initial)>
running thread  <Thread(Thread-2034, initial)>
running thread  <Thread(Thread-2035, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.44it/s]


current loss: [[2.37264402e+108]]
running thread  <Thread(Thread-2036, initial)>
running thread  <Thread(Thread-2037, initial)>
running thread  <Thread(Thread-2038, initial)>
running thread  <Thread(Thread-2039, initial)>
running thread  <Thread(Thread-2040, initial)>
running thread  <Thread(Thread-2041, initial)>
running thread  <Thread(Thread-2042, initial)>
running thread  <Thread(Thread-2043, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.32it/s]


current loss: [[1.85733863e+271]]
running thread  <Thread(Thread-2044, initial)>
running thread  <Thread(Thread-2045, initial)>
running thread  <Thread(Thread-2046, initial)>
running thread  <Thread(Thread-2047, initial)>
running thread  <Thread(Thread-2048, initial)>
running thread  <Thread(Thread-2049, initial)>
running thread  <Thread(Thread-2050, initial)>
running thread  <Thread(Thread-2051, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.75it/s]


current loss: [[2.17720037e+113]]
running thread  <Thread(Thread-2052, initial)>
running thread  <Thread(Thread-2053, initial)>
running thread  <Thread(Thread-2054, initial)>
running thread  <Thread(Thread-2055, initial)>
running thread  <Thread(Thread-2056, initial)>
running thread  <Thread(Thread-2057, initial)>
running thread  <Thread(Thread-2058, initial)>
running thread  <Thread(Thread-2059, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.52it/s]


current loss: [[5.55461998e+174]]
running thread  <Thread(Thread-2060, initial)>
running thread  <Thread(Thread-2061, initial)>
running thread  <Thread(Thread-2062, initial)>
running thread  <Thread(Thread-2063, initial)>
running thread  <Thread(Thread-2064, initial)>
running thread  <Thread(Thread-2065, initial)>
running thread  <Thread(Thread-2066, initial)>
running thread  <Thread(Thread-2067, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.76it/s]


current loss: [[1.09089361e+220]]
running thread  <Thread(Thread-2068, initial)>
running thread  <Thread(Thread-2069, initial)>
running thread  <Thread(Thread-2070, initial)>
running thread  <Thread(Thread-2071, initial)>
running thread  <Thread(Thread-2072, initial)>
running thread  <Thread(Thread-2073, initial)>
running thread  <Thread(Thread-2074, initial)>
running thread  <Thread(Thread-2075, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.90it/s]


current loss: [[2.33021972e+200]]
running thread  <Thread(Thread-2076, initial)>
running thread  <Thread(Thread-2077, initial)>
running thread  <Thread(Thread-2078, initial)>
running thread  <Thread(Thread-2079, initial)>
running thread  <Thread(Thread-2080, initial)>
running thread  <Thread(Thread-2081, initial)>
running thread  <Thread(Thread-2082, initial)>
running thread  <Thread(Thread-2083, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.95it/s]


current loss: [[1.7144237e+99]]
running thread  <Thread(Thread-2084, initial)>
running thread  <Thread(Thread-2085, initial)>
running thread  <Thread(Thread-2086, initial)>
running thread  <Thread(Thread-2087, initial)>
running thread  <Thread(Thread-2088, initial)>
running thread  <Thread(Thread-2089, initial)>
running thread  <Thread(Thread-2090, initial)>
running thread  <Thread(Thread-2091, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.65it/s]


current loss: [[1.86226073e+120]]
running thread  <Thread(Thread-2092, initial)>
running thread  <Thread(Thread-2093, initial)>
running thread  <Thread(Thread-2094, initial)>
running thread  <Thread(Thread-2095, initial)>
running thread  <Thread(Thread-2096, initial)>
running thread  <Thread(Thread-2097, initial)>
running thread  <Thread(Thread-2098, initial)>
running thread  <Thread(Thread-2099, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.31it/s]


current loss: [[46.76351004]]
running thread  <Thread(Thread-2100, initial)>
running thread  <Thread(Thread-2101, initial)>
running thread  <Thread(Thread-2102, initial)>
running thread  <Thread(Thread-2103, initial)>
running thread  <Thread(Thread-2104, initial)>
running thread  <Thread(Thread-2105, initial)>
running thread  <Thread(Thread-2106, initial)>
running thread  <Thread(Thread-2107, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.93it/s]


current loss: [[nan]]
running thread  <Thread(Thread-2108, initial)>
running thread  <Thread(Thread-2109, initial)>
running thread  <Thread(Thread-2110, initial)>
running thread  <Thread(Thread-2111, initial)>
running thread  <Thread(Thread-2112, initial)>
running thread  <Thread(Thread-2113, initial)>
running thread  <Thread(Thread-2114, initial)>
running thread  <Thread(Thread-2115, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.44it/s]


current loss: [[1.57940827e+282]]
running thread  <Thread(Thread-2116, initial)>
running thread  <Thread(Thread-2117, initial)>
running thread  <Thread(Thread-2118, initial)>
running thread  <Thread(Thread-2119, initial)>
running thread  <Thread(Thread-2120, initial)>
running thread  <Thread(Thread-2121, initial)>
running thread  <Thread(Thread-2122, initial)>
running thread  <Thread(Thread-2123, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.81it/s]


current loss: [[49.40034759]]
running thread  <Thread(Thread-2124, initial)>
running thread  <Thread(Thread-2125, initial)>
running thread  <Thread(Thread-2126, initial)>
running thread  <Thread(Thread-2127, initial)>
running thread  <Thread(Thread-2128, initial)>
running thread  <Thread(Thread-2129, initial)>
running thread  <Thread(Thread-2130, initial)>
running thread  <Thread(Thread-2131, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.53it/s]


current loss: [[43.99237303]]
running thread  <Thread(Thread-2132, initial)>
running thread  <Thread(Thread-2133, initial)>
running thread  <Thread(Thread-2134, initial)>
running thread  <Thread(Thread-2135, initial)>
running thread  <Thread(Thread-2136, initial)>
running thread  <Thread(Thread-2137, initial)>
running thread  <Thread(Thread-2138, initial)>
running thread  <Thread(Thread-2139, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.02it/s]


current loss: [[1.66882521e+199]]
running thread  <Thread(Thread-2140, initial)>
running thread  <Thread(Thread-2141, initial)>
running thread  <Thread(Thread-2142, initial)>
running thread  <Thread(Thread-2143, initial)>
running thread  <Thread(Thread-2144, initial)>
running thread  <Thread(Thread-2145, initial)>
running thread  <Thread(Thread-2146, initial)>
running thread  <Thread(Thread-2147, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.78it/s]


current loss: [[4.32551687e+146]]
running thread  <Thread(Thread-2148, initial)>
running thread  <Thread(Thread-2149, initial)>
running thread  <Thread(Thread-2150, initial)>
running thread  <Thread(Thread-2151, initial)>
running thread  <Thread(Thread-2152, initial)>
running thread  <Thread(Thread-2153, initial)>
running thread  <Thread(Thread-2154, initial)>
running thread  <Thread(Thread-2155, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.60it/s]


current loss: [[46.76181288]]
running thread  <Thread(Thread-2156, initial)>
running thread  <Thread(Thread-2157, initial)>
running thread  <Thread(Thread-2158, initial)>
running thread  <Thread(Thread-2159, initial)>
running thread  <Thread(Thread-2160, initial)>
running thread  <Thread(Thread-2161, initial)>
running thread  <Thread(Thread-2162, initial)>
running thread  <Thread(Thread-2163, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.05it/s]


current loss: [[49.45165687]]
running thread  <Thread(Thread-2164, initial)>
running thread  <Thread(Thread-2165, initial)>
running thread  <Thread(Thread-2166, initial)>
running thread  <Thread(Thread-2167, initial)>
running thread  <Thread(Thread-2168, initial)>
running thread  <Thread(Thread-2169, initial)>
running thread  <Thread(Thread-2170, initial)>
running thread  <Thread(Thread-2171, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.99it/s]


current loss: [[46.8517253]]
running thread  <Thread(Thread-2172, initial)>
running thread  <Thread(Thread-2173, initial)>
running thread  <Thread(Thread-2174, initial)>
running thread  <Thread(Thread-2175, initial)>
running thread  <Thread(Thread-2176, initial)>
running thread  <Thread(Thread-2177, initial)>
running thread  <Thread(Thread-2178, initial)>
running thread  <Thread(Thread-2179, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.73it/s]


current loss: [[nan]]
running thread  <Thread(Thread-2180, initial)>
running thread  <Thread(Thread-2181, initial)>
running thread  <Thread(Thread-2182, initial)>
running thread  <Thread(Thread-2183, initial)>
running thread  <Thread(Thread-2184, initial)>
running thread  <Thread(Thread-2185, initial)>
running thread  <Thread(Thread-2186, initial)>
running thread  <Thread(Thread-2187, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.40it/s]


current loss: [[48.42485919]]
running thread  <Thread(Thread-2188, initial)>
running thread  <Thread(Thread-2189, initial)>
running thread  <Thread(Thread-2190, initial)>
running thread  <Thread(Thread-2191, initial)>
running thread  <Thread(Thread-2192, initial)>
running thread  <Thread(Thread-2193, initial)>
running thread  <Thread(Thread-2194, initial)>
running thread  <Thread(Thread-2195, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.85it/s]


current loss: [[8.89960461e+288]]
running thread  <Thread(Thread-2196, initial)>
running thread  <Thread(Thread-2197, initial)>
running thread  <Thread(Thread-2198, initial)>
running thread  <Thread(Thread-2199, initial)>
running thread  <Thread(Thread-2200, initial)>
running thread  <Thread(Thread-2201, initial)>
running thread  <Thread(Thread-2202, initial)>
running thread  <Thread(Thread-2203, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.99it/s]


current loss: [[nan]]
running thread  <Thread(Thread-2204, initial)>
running thread  <Thread(Thread-2205, initial)>
running thread  <Thread(Thread-2206, initial)>
running thread  <Thread(Thread-2207, initial)>
running thread  <Thread(Thread-2208, initial)>
running thread  <Thread(Thread-2209, initial)>
running thread  <Thread(Thread-2210, initial)>
running thread  <Thread(Thread-2211, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.21it/s]


current loss: [[4.77171553e+200]]
running thread  <Thread(Thread-2212, initial)>
running thread  <Thread(Thread-2213, initial)>
running thread  <Thread(Thread-2214, initial)>
running thread  <Thread(Thread-2215, initial)>
running thread  <Thread(Thread-2216, initial)>
running thread  <Thread(Thread-2217, initial)>
running thread  <Thread(Thread-2218, initial)>
running thread  <Thread(Thread-2219, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.23it/s]


current loss: [[2.62783677e+112]]
running thread  <Thread(Thread-2220, initial)>
running thread  <Thread(Thread-2221, initial)>
running thread  <Thread(Thread-2222, initial)>
running thread  <Thread(Thread-2223, initial)>
running thread  <Thread(Thread-2224, initial)>
running thread  <Thread(Thread-2225, initial)>
running thread  <Thread(Thread-2226, initial)>
running thread  <Thread(Thread-2227, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.28it/s]


current loss: [[45.28937455]]
running thread  <Thread(Thread-2228, initial)>
running thread  <Thread(Thread-2229, initial)>
running thread  <Thread(Thread-2230, initial)>
running thread  <Thread(Thread-2231, initial)>
running thread  <Thread(Thread-2232, initial)>
running thread  <Thread(Thread-2233, initial)>
running thread  <Thread(Thread-2234, initial)>
running thread  <Thread(Thread-2235, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.57it/s]


current loss: [[7.09449416e+274]]
running thread  <Thread(Thread-2236, initial)>
running thread  <Thread(Thread-2237, initial)>
running thread  <Thread(Thread-2238, initial)>
running thread  <Thread(Thread-2239, initial)>
running thread  <Thread(Thread-2240, initial)>
running thread  <Thread(Thread-2241, initial)>
running thread  <Thread(Thread-2242, initial)>
running thread  <Thread(Thread-2243, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.70it/s]


current loss: [[3.89080826e+184]]
running thread  <Thread(Thread-2244, initial)>
running thread  <Thread(Thread-2245, initial)>
running thread  <Thread(Thread-2246, initial)>
running thread  <Thread(Thread-2247, initial)>
running thread  <Thread(Thread-2248, initial)>
running thread  <Thread(Thread-2249, initial)>
running thread  <Thread(Thread-2250, initial)>
running thread  <Thread(Thread-2251, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.63it/s]


current loss: [[49.44804815]]
running thread  <Thread(Thread-2252, initial)>
running thread  <Thread(Thread-2253, initial)>
running thread  <Thread(Thread-2254, initial)>
running thread  <Thread(Thread-2255, initial)>
running thread  <Thread(Thread-2256, initial)>
running thread  <Thread(Thread-2257, initial)>
running thread  <Thread(Thread-2258, initial)>
running thread  <Thread(Thread-2259, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.08it/s]


current loss: [[5.72676285e+137]]
running thread  <Thread(Thread-2260, initial)>
running thread  <Thread(Thread-2261, initial)>
running thread  <Thread(Thread-2262, initial)>
running thread  <Thread(Thread-2263, initial)>
running thread  <Thread(Thread-2264, initial)>
running thread  <Thread(Thread-2265, initial)>
running thread  <Thread(Thread-2266, initial)>
running thread  <Thread(Thread-2267, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.63it/s]


current loss: [[51.68352887]]
running thread  <Thread(Thread-2268, initial)>
running thread  <Thread(Thread-2269, initial)>
running thread  <Thread(Thread-2270, initial)>
running thread  <Thread(Thread-2271, initial)>
running thread  <Thread(Thread-2272, initial)>
running thread  <Thread(Thread-2273, initial)>
running thread  <Thread(Thread-2274, initial)>
running thread  <Thread(Thread-2275, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.15it/s]


current loss: [[9.84115204e+136]]
running thread  <Thread(Thread-2276, initial)>
running thread  <Thread(Thread-2277, initial)>
running thread  <Thread(Thread-2278, initial)>
running thread  <Thread(Thread-2279, initial)>
running thread  <Thread(Thread-2280, initial)>
running thread  <Thread(Thread-2281, initial)>
running thread  <Thread(Thread-2282, initial)>
running thread  <Thread(Thread-2283, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.21it/s]


current loss: [[47.37332442]]
running thread  <Thread(Thread-2284, initial)>
running thread  <Thread(Thread-2285, initial)>
running thread  <Thread(Thread-2286, initial)>
running thread  <Thread(Thread-2287, initial)>
running thread  <Thread(Thread-2288, initial)>
running thread  <Thread(Thread-2289, initial)>
running thread  <Thread(Thread-2290, initial)>
running thread  <Thread(Thread-2291, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.97it/s]


current loss: [[9.4003632e+106]]
running thread  <Thread(Thread-2292, initial)>
running thread  <Thread(Thread-2293, initial)>
running thread  <Thread(Thread-2294, initial)>
running thread  <Thread(Thread-2295, initial)>
running thread  <Thread(Thread-2296, initial)>
running thread  <Thread(Thread-2297, initial)>
running thread  <Thread(Thread-2298, initial)>
running thread  <Thread(Thread-2299, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.43it/s]


current loss: [[2.84558443e+103]]
running thread  <Thread(Thread-2300, initial)>
running thread  <Thread(Thread-2301, initial)>
running thread  <Thread(Thread-2302, initial)>
running thread  <Thread(Thread-2303, initial)>
running thread  <Thread(Thread-2304, initial)>
running thread  <Thread(Thread-2305, initial)>
running thread  <Thread(Thread-2306, initial)>
running thread  <Thread(Thread-2307, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.37it/s]


current loss: [[8.48760476e+114]]
running thread  <Thread(Thread-2308, initial)>
running thread  <Thread(Thread-2309, initial)>
running thread  <Thread(Thread-2310, initial)>
running thread  <Thread(Thread-2311, initial)>
running thread  <Thread(Thread-2312, initial)>
running thread  <Thread(Thread-2313, initial)>
running thread  <Thread(Thread-2314, initial)>
running thread  <Thread(Thread-2315, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.05it/s]


current loss: [[49.02913246]]
running thread  <Thread(Thread-2316, initial)>
running thread  <Thread(Thread-2317, initial)>
running thread  <Thread(Thread-2318, initial)>
running thread  <Thread(Thread-2319, initial)>
running thread  <Thread(Thread-2320, initial)>
running thread  <Thread(Thread-2321, initial)>
running thread  <Thread(Thread-2322, initial)>
running thread  <Thread(Thread-2323, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.55it/s]


current loss: [[nan]]
running thread  <Thread(Thread-2324, initial)>
running thread  <Thread(Thread-2325, initial)>
running thread  <Thread(Thread-2326, initial)>
running thread  <Thread(Thread-2327, initial)>
running thread  <Thread(Thread-2328, initial)>
running thread  <Thread(Thread-2329, initial)>
running thread  <Thread(Thread-2330, initial)>
running thread  <Thread(Thread-2331, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.38it/s]


current loss: [[48.64035105]]
running thread  <Thread(Thread-2332, initial)>
running thread  <Thread(Thread-2333, initial)>
running thread  <Thread(Thread-2334, initial)>
running thread  <Thread(Thread-2335, initial)>
running thread  <Thread(Thread-2336, initial)>
running thread  <Thread(Thread-2337, initial)>
running thread  <Thread(Thread-2338, initial)>
running thread  <Thread(Thread-2339, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.25it/s]


current loss: [[1.53334226e+300]]
running thread  <Thread(Thread-2340, initial)>
running thread  <Thread(Thread-2341, initial)>
running thread  <Thread(Thread-2342, initial)>
running thread  <Thread(Thread-2343, initial)>
running thread  <Thread(Thread-2344, initial)>
running thread  <Thread(Thread-2345, initial)>
running thread  <Thread(Thread-2346, initial)>
running thread  <Thread(Thread-2347, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.80it/s]


current loss: [[42.26321395]]
running thread  <Thread(Thread-2348, initial)>
running thread  <Thread(Thread-2349, initial)>
running thread  <Thread(Thread-2350, initial)>
running thread  <Thread(Thread-2351, initial)>
running thread  <Thread(Thread-2352, initial)>
running thread  <Thread(Thread-2353, initial)>
running thread  <Thread(Thread-2354, initial)>
running thread  <Thread(Thread-2355, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.29it/s]


current loss: [[4.83622314e+75]]
running thread  <Thread(Thread-2356, initial)>
running thread  <Thread(Thread-2357, initial)>
running thread  <Thread(Thread-2358, initial)>
running thread  <Thread(Thread-2359, initial)>
running thread  <Thread(Thread-2360, initial)>
running thread  <Thread(Thread-2361, initial)>
running thread  <Thread(Thread-2362, initial)>
running thread  <Thread(Thread-2363, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.47it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-2364, initial)>
running thread  <Thread(Thread-2365, initial)>
running thread  <Thread(Thread-2366, initial)>
running thread  <Thread(Thread-2367, initial)>
running thread  <Thread(Thread-2368, initial)>
running thread  <Thread(Thread-2369, initial)>
running thread  <Thread(Thread-2370, initial)>
running thread  <Thread(Thread-2371, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.13it/s]


current loss: [[6.32230276e+182]]
running thread  <Thread(Thread-2372, initial)>
running thread  <Thread(Thread-2373, initial)>
running thread  <Thread(Thread-2374, initial)>
running thread  <Thread(Thread-2375, initial)>
running thread  <Thread(Thread-2376, initial)>
running thread  <Thread(Thread-2377, initial)>
running thread  <Thread(Thread-2378, initial)>
running thread  <Thread(Thread-2379, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.87it/s]


current loss: [[49.19729434]]
running thread  <Thread(Thread-2380, initial)>
running thread  <Thread(Thread-2381, initial)>
running thread  <Thread(Thread-2382, initial)>
running thread  <Thread(Thread-2383, initial)>
running thread  <Thread(Thread-2384, initial)>
running thread  <Thread(Thread-2385, initial)>
running thread  <Thread(Thread-2386, initial)>
running thread  <Thread(Thread-2387, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.49it/s]


current loss: [[4.42559965e+150]]
running thread  <Thread(Thread-2388, initial)>
running thread  <Thread(Thread-2389, initial)>
running thread  <Thread(Thread-2390, initial)>
running thread  <Thread(Thread-2391, initial)>
running thread  <Thread(Thread-2392, initial)>
running thread  <Thread(Thread-2393, initial)>
running thread  <Thread(Thread-2394, initial)>
running thread  <Thread(Thread-2395, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.05it/s]


current loss: [[nan]]
running thread  <Thread(Thread-2396, initial)>
running thread  <Thread(Thread-2397, initial)>
running thread  <Thread(Thread-2398, initial)>
running thread  <Thread(Thread-2399, initial)>
running thread  <Thread(Thread-2400, initial)>
running thread  <Thread(Thread-2401, initial)>
running thread  <Thread(Thread-2402, initial)>
running thread  <Thread(Thread-2403, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.14it/s]


current loss: [[2.61899886e+214]]
running thread  <Thread(Thread-2404, initial)>
running thread  <Thread(Thread-2405, initial)>
running thread  <Thread(Thread-2406, initial)>
running thread  <Thread(Thread-2407, initial)>
running thread  <Thread(Thread-2408, initial)>
running thread  <Thread(Thread-2409, initial)>
running thread  <Thread(Thread-2410, initial)>
running thread  <Thread(Thread-2411, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.93it/s]


current loss: [[1.03838246e+235]]
running thread  <Thread(Thread-2412, initial)>
running thread  <Thread(Thread-2413, initial)>
running thread  <Thread(Thread-2414, initial)>
running thread  <Thread(Thread-2415, initial)>
running thread  <Thread(Thread-2416, initial)>
running thread  <Thread(Thread-2417, initial)>
running thread  <Thread(Thread-2418, initial)>
running thread  <Thread(Thread-2419, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.43it/s]


current loss: [[48.21332852]]
running thread  <Thread(Thread-2420, initial)>
running thread  <Thread(Thread-2421, initial)>
running thread  <Thread(Thread-2422, initial)>
running thread  <Thread(Thread-2423, initial)>
running thread  <Thread(Thread-2424, initial)>
running thread  <Thread(Thread-2425, initial)>
running thread  <Thread(Thread-2426, initial)>
running thread  <Thread(Thread-2427, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.32it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-2428, initial)>
running thread  <Thread(Thread-2429, initial)>
running thread  <Thread(Thread-2430, initial)>
running thread  <Thread(Thread-2431, initial)>
running thread  <Thread(Thread-2432, initial)>
running thread  <Thread(Thread-2433, initial)>
running thread  <Thread(Thread-2434, initial)>
running thread  <Thread(Thread-2435, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.70it/s]


current loss: [[2.00409895e+76]]
running thread  <Thread(Thread-2436, initial)>
running thread  <Thread(Thread-2437, initial)>
running thread  <Thread(Thread-2438, initial)>
running thread  <Thread(Thread-2439, initial)>
running thread  <Thread(Thread-2440, initial)>
running thread  <Thread(Thread-2441, initial)>
running thread  <Thread(Thread-2442, initial)>
running thread  <Thread(Thread-2443, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.19it/s]


current loss: [[47.26403141]]
running thread  <Thread(Thread-2444, initial)>
running thread  <Thread(Thread-2445, initial)>
running thread  <Thread(Thread-2446, initial)>
running thread  <Thread(Thread-2447, initial)>
running thread  <Thread(Thread-2448, initial)>
running thread  <Thread(Thread-2449, initial)>
running thread  <Thread(Thread-2450, initial)>
running thread  <Thread(Thread-2451, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.99it/s]


current loss: [[6.74447254e+45]]
running thread  <Thread(Thread-2452, initial)>
running thread  <Thread(Thread-2453, initial)>
running thread  <Thread(Thread-2454, initial)>
running thread  <Thread(Thread-2455, initial)>
running thread  <Thread(Thread-2456, initial)>
running thread  <Thread(Thread-2457, initial)>
running thread  <Thread(Thread-2458, initial)>
running thread  <Thread(Thread-2459, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.15it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-2460, initial)>
running thread  <Thread(Thread-2461, initial)>
running thread  <Thread(Thread-2462, initial)>
running thread  <Thread(Thread-2463, initial)>
running thread  <Thread(Thread-2464, initial)>
running thread  <Thread(Thread-2465, initial)>
running thread  <Thread(Thread-2466, initial)>
running thread  <Thread(Thread-2467, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.81it/s]


current loss: [[nan]]
running thread  <Thread(Thread-2468, initial)>
running thread  <Thread(Thread-2469, initial)>
running thread  <Thread(Thread-2470, initial)>
running thread  <Thread(Thread-2471, initial)>
running thread  <Thread(Thread-2472, initial)>
running thread  <Thread(Thread-2473, initial)>
running thread  <Thread(Thread-2474, initial)>
running thread  <Thread(Thread-2475, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.44it/s]


current loss: [[nan]]
running thread  <Thread(Thread-2476, initial)>
running thread  <Thread(Thread-2477, initial)>
running thread  <Thread(Thread-2478, initial)>
running thread  <Thread(Thread-2479, initial)>
running thread  <Thread(Thread-2480, initial)>
running thread  <Thread(Thread-2481, initial)>
running thread  <Thread(Thread-2482, initial)>
running thread  <Thread(Thread-2483, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.53it/s]


current loss: [[2.42761955e+202]]
running thread  <Thread(Thread-2484, initial)>
running thread  <Thread(Thread-2485, initial)>
running thread  <Thread(Thread-2486, initial)>
running thread  <Thread(Thread-2487, initial)>
running thread  <Thread(Thread-2488, initial)>
running thread  <Thread(Thread-2489, initial)>
running thread  <Thread(Thread-2490, initial)>
running thread  <Thread(Thread-2491, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 11.29it/s]


current loss: [[nan]]
running thread  <Thread(Thread-2492, initial)>
running thread  <Thread(Thread-2493, initial)>
running thread  <Thread(Thread-2494, initial)>
running thread  <Thread(Thread-2495, initial)>
running thread  <Thread(Thread-2496, initial)>
running thread  <Thread(Thread-2497, initial)>
running thread  <Thread(Thread-2498, initial)>
running thread  <Thread(Thread-2499, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 11.04it/s]


current loss: [[476.70688028]]
running thread  <Thread(Thread-2500, initial)>
running thread  <Thread(Thread-2501, initial)>
running thread  <Thread(Thread-2502, initial)>
running thread  <Thread(Thread-2503, initial)>
running thread  <Thread(Thread-2504, initial)>
running thread  <Thread(Thread-2505, initial)>
running thread  <Thread(Thread-2506, initial)>
running thread  <Thread(Thread-2507, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.96it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-2508, initial)>
running thread  <Thread(Thread-2509, initial)>
running thread  <Thread(Thread-2510, initial)>
running thread  <Thread(Thread-2511, initial)>
running thread  <Thread(Thread-2512, initial)>
running thread  <Thread(Thread-2513, initial)>
running thread  <Thread(Thread-2514, initial)>
running thread  <Thread(Thread-2515, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.00it/s]


current loss: [[6.65101194e+299]]
running thread  <Thread(Thread-2516, initial)>
running thread  <Thread(Thread-2517, initial)>
running thread  <Thread(Thread-2518, initial)>
running thread  <Thread(Thread-2519, initial)>
running thread  <Thread(Thread-2520, initial)>
running thread  <Thread(Thread-2521, initial)>
running thread  <Thread(Thread-2522, initial)>
running thread  <Thread(Thread-2523, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.85it/s]


current loss: [[5.68751471e+157]]
running thread  <Thread(Thread-2524, initial)>
running thread  <Thread(Thread-2525, initial)>
running thread  <Thread(Thread-2526, initial)>
running thread  <Thread(Thread-2527, initial)>
running thread  <Thread(Thread-2528, initial)>
running thread  <Thread(Thread-2529, initial)>
running thread  <Thread(Thread-2530, initial)>
running thread  <Thread(Thread-2531, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.96it/s]


current loss: [[45.07728206]]
running thread  <Thread(Thread-2532, initial)>
running thread  <Thread(Thread-2533, initial)>
running thread  <Thread(Thread-2534, initial)>
running thread  <Thread(Thread-2535, initial)>
running thread  <Thread(Thread-2536, initial)>
running thread  <Thread(Thread-2537, initial)>
running thread  <Thread(Thread-2538, initial)>
running thread  <Thread(Thread-2539, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.60it/s]


current loss: [[8.82804233e+147]]
running thread  <Thread(Thread-2540, initial)>
running thread  <Thread(Thread-2541, initial)>
running thread  <Thread(Thread-2542, initial)>
running thread  <Thread(Thread-2543, initial)>
running thread  <Thread(Thread-2544, initial)>
running thread  <Thread(Thread-2545, initial)>
running thread  <Thread(Thread-2546, initial)>
running thread  <Thread(Thread-2547, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.28it/s]


current loss: [[5.34586304e+127]]
running thread  <Thread(Thread-2548, initial)>
running thread  <Thread(Thread-2549, initial)>
running thread  <Thread(Thread-2550, initial)>
running thread  <Thread(Thread-2551, initial)>
running thread  <Thread(Thread-2552, initial)>
running thread  <Thread(Thread-2553, initial)>
running thread  <Thread(Thread-2554, initial)>
running thread  <Thread(Thread-2555, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.64it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-2556, initial)>
running thread  <Thread(Thread-2557, initial)>
running thread  <Thread(Thread-2558, initial)>
running thread  <Thread(Thread-2559, initial)>
running thread  <Thread(Thread-2560, initial)>
running thread  <Thread(Thread-2561, initial)>
running thread  <Thread(Thread-2562, initial)>
running thread  <Thread(Thread-2563, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.02it/s]


current loss: [[43.69432721]]
running thread  <Thread(Thread-2564, initial)>
running thread  <Thread(Thread-2565, initial)>
running thread  <Thread(Thread-2566, initial)>
running thread  <Thread(Thread-2567, initial)>
running thread  <Thread(Thread-2568, initial)>
running thread  <Thread(Thread-2569, initial)>
running thread  <Thread(Thread-2570, initial)>
running thread  <Thread(Thread-2571, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 17.08it/s]


current loss: [[5.42241506e+245]]
running thread  <Thread(Thread-2572, initial)>
running thread  <Thread(Thread-2573, initial)>
running thread  <Thread(Thread-2574, initial)>
running thread  <Thread(Thread-2575, initial)>
running thread  <Thread(Thread-2576, initial)>
running thread  <Thread(Thread-2577, initial)>
running thread  <Thread(Thread-2578, initial)>
running thread  <Thread(Thread-2579, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.49it/s]


current loss: [[nan]]
running thread  <Thread(Thread-2580, initial)>
running thread  <Thread(Thread-2581, initial)>
running thread  <Thread(Thread-2582, initial)>
running thread  <Thread(Thread-2583, initial)>
running thread  <Thread(Thread-2584, initial)>
running thread  <Thread(Thread-2585, initial)>
running thread  <Thread(Thread-2586, initial)>
running thread  <Thread(Thread-2587, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.34it/s]


current loss: [[49.80750998]]
running thread  <Thread(Thread-2588, initial)>
running thread  <Thread(Thread-2589, initial)>
running thread  <Thread(Thread-2590, initial)>
running thread  <Thread(Thread-2591, initial)>
running thread  <Thread(Thread-2592, initial)>
running thread  <Thread(Thread-2593, initial)>
running thread  <Thread(Thread-2594, initial)>
running thread  <Thread(Thread-2595, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.63it/s]


current loss: [[5.78101462e+32]]
running thread  <Thread(Thread-2596, initial)>
running thread  <Thread(Thread-2597, initial)>
running thread  <Thread(Thread-2598, initial)>
running thread  <Thread(Thread-2599, initial)>
running thread  <Thread(Thread-2600, initial)>
running thread  <Thread(Thread-2601, initial)>
running thread  <Thread(Thread-2602, initial)>
running thread  <Thread(Thread-2603, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.40it/s]


current loss: [[1.18122655e+60]]
running thread  <Thread(Thread-2604, initial)>
running thread  <Thread(Thread-2605, initial)>
running thread  <Thread(Thread-2606, initial)>
running thread  <Thread(Thread-2607, initial)>
running thread  <Thread(Thread-2608, initial)>
running thread  <Thread(Thread-2609, initial)>
running thread  <Thread(Thread-2610, initial)>
running thread  <Thread(Thread-2611, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.33it/s]


current loss: [[7.45880754e+88]]
running thread  <Thread(Thread-2612, initial)>
running thread  <Thread(Thread-2613, initial)>
running thread  <Thread(Thread-2614, initial)>
running thread  <Thread(Thread-2615, initial)>
running thread  <Thread(Thread-2616, initial)>
running thread  <Thread(Thread-2617, initial)>
running thread  <Thread(Thread-2618, initial)>
running thread  <Thread(Thread-2619, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.87it/s]


current loss: [[nan]]
running thread  <Thread(Thread-2620, initial)>
running thread  <Thread(Thread-2621, initial)>
running thread  <Thread(Thread-2622, initial)>
running thread  <Thread(Thread-2623, initial)>
running thread  <Thread(Thread-2624, initial)>
running thread  <Thread(Thread-2625, initial)>
running thread  <Thread(Thread-2626, initial)>
running thread  <Thread(Thread-2627, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.65it/s]


current loss: [[1.30717389e+290]]
running thread  <Thread(Thread-2628, initial)>
running thread  <Thread(Thread-2629, initial)>
running thread  <Thread(Thread-2630, initial)>
running thread  <Thread(Thread-2631, initial)>
running thread  <Thread(Thread-2632, initial)>
running thread  <Thread(Thread-2633, initial)>
running thread  <Thread(Thread-2634, initial)>
running thread  <Thread(Thread-2635, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 13.08it/s]


current loss: [[1.26040173e+186]]
running thread  <Thread(Thread-2636, initial)>
running thread  <Thread(Thread-2637, initial)>
running thread  <Thread(Thread-2638, initial)>
running thread  <Thread(Thread-2639, initial)>
running thread  <Thread(Thread-2640, initial)>
running thread  <Thread(Thread-2641, initial)>
running thread  <Thread(Thread-2642, initial)>
running thread  <Thread(Thread-2643, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.01it/s]


current loss: [[nan]]
running thread  <Thread(Thread-2644, initial)>
running thread  <Thread(Thread-2645, initial)>
running thread  <Thread(Thread-2646, initial)>
running thread  <Thread(Thread-2647, initial)>
running thread  <Thread(Thread-2648, initial)>
running thread  <Thread(Thread-2649, initial)>
running thread  <Thread(Thread-2650, initial)>
running thread  <Thread(Thread-2651, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 15.74it/s]


current loss: [[1.27784336e+141]]
running thread  <Thread(Thread-2652, initial)>
running thread  <Thread(Thread-2653, initial)>
running thread  <Thread(Thread-2654, initial)>
running thread  <Thread(Thread-2655, initial)>
running thread  <Thread(Thread-2656, initial)>
running thread  <Thread(Thread-2657, initial)>
running thread  <Thread(Thread-2658, initial)>
running thread  <Thread(Thread-2659, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.94it/s]


current loss: [[nan]]
running thread  <Thread(Thread-2660, initial)>
running thread  <Thread(Thread-2661, initial)>
running thread  <Thread(Thread-2662, initial)>
running thread  <Thread(Thread-2663, initial)>
running thread  <Thread(Thread-2664, initial)>
running thread  <Thread(Thread-2665, initial)>
running thread  <Thread(Thread-2666, initial)>
running thread  <Thread(Thread-2667, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.96it/s]


current loss: [[44.92500207]]
running thread  <Thread(Thread-2668, initial)>
running thread  <Thread(Thread-2669, initial)>
running thread  <Thread(Thread-2670, initial)>
running thread  <Thread(Thread-2671, initial)>
running thread  <Thread(Thread-2672, initial)>
running thread  <Thread(Thread-2673, initial)>
running thread  <Thread(Thread-2674, initial)>
running thread  <Thread(Thread-2675, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.58it/s]


current loss: [[nan]]
running thread  <Thread(Thread-2676, initial)>
running thread  <Thread(Thread-2677, initial)>
running thread  <Thread(Thread-2678, initial)>
running thread  <Thread(Thread-2679, initial)>
running thread  <Thread(Thread-2680, initial)>
running thread  <Thread(Thread-2681, initial)>
running thread  <Thread(Thread-2682, initial)>
running thread  <Thread(Thread-2683, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.82it/s]


current loss: [[6.03121015e+50]]
running thread  <Thread(Thread-2684, initial)>
running thread  <Thread(Thread-2685, initial)>
running thread  <Thread(Thread-2686, initial)>
running thread  <Thread(Thread-2687, initial)>
running thread  <Thread(Thread-2688, initial)>
running thread  <Thread(Thread-2689, initial)>
running thread  <Thread(Thread-2690, initial)>
running thread  <Thread(Thread-2691, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.26it/s]


current loss: [[5.54556371e+88]]
running thread  <Thread(Thread-2692, initial)>
running thread  <Thread(Thread-2693, initial)>
running thread  <Thread(Thread-2694, initial)>
running thread  <Thread(Thread-2695, initial)>
running thread  <Thread(Thread-2696, initial)>
running thread  <Thread(Thread-2697, initial)>
running thread  <Thread(Thread-2698, initial)>
running thread  <Thread(Thread-2699, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.56it/s]


current loss: [[1.08601012e+246]]
running thread  <Thread(Thread-2700, initial)>
running thread  <Thread(Thread-2701, initial)>
running thread  <Thread(Thread-2702, initial)>
running thread  <Thread(Thread-2703, initial)>
running thread  <Thread(Thread-2704, initial)>
running thread  <Thread(Thread-2705, initial)>
running thread  <Thread(Thread-2706, initial)>
running thread  <Thread(Thread-2707, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.50it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-2708, initial)>
running thread  <Thread(Thread-2709, initial)>
running thread  <Thread(Thread-2710, initial)>
running thread  <Thread(Thread-2711, initial)>
running thread  <Thread(Thread-2712, initial)>
running thread  <Thread(Thread-2713, initial)>
running thread  <Thread(Thread-2714, initial)>
running thread  <Thread(Thread-2715, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.88it/s]


current loss: [[4.14504651e+154]]
running thread  <Thread(Thread-2716, initial)>
running thread  <Thread(Thread-2717, initial)>
running thread  <Thread(Thread-2718, initial)>
running thread  <Thread(Thread-2719, initial)>
running thread  <Thread(Thread-2720, initial)>
running thread  <Thread(Thread-2721, initial)>
running thread  <Thread(Thread-2722, initial)>
running thread  <Thread(Thread-2723, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.76it/s]


current loss: [[1.33240494e+151]]
running thread  <Thread(Thread-2724, initial)>
running thread  <Thread(Thread-2725, initial)>
running thread  <Thread(Thread-2726, initial)>
running thread  <Thread(Thread-2727, initial)>
running thread  <Thread(Thread-2728, initial)>
running thread  <Thread(Thread-2729, initial)>
running thread  <Thread(Thread-2730, initial)>
running thread  <Thread(Thread-2731, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 14.41it/s]


current loss: [[49.90130989]]
running thread  <Thread(Thread-2732, initial)>
running thread  <Thread(Thread-2733, initial)>
running thread  <Thread(Thread-2734, initial)>
running thread  <Thread(Thread-2735, initial)>
running thread  <Thread(Thread-2736, initial)>
running thread  <Thread(Thread-2737, initial)>
running thread  <Thread(Thread-2738, initial)>
running thread  <Thread(Thread-2739, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.29it/s]


current loss: [[1.73500139e+76]]
running thread  <Thread(Thread-2740, initial)>
running thread  <Thread(Thread-2741, initial)>
running thread  <Thread(Thread-2742, initial)>
running thread  <Thread(Thread-2743, initial)>
running thread  <Thread(Thread-2744, initial)>
running thread  <Thread(Thread-2745, initial)>
running thread  <Thread(Thread-2746, initial)>
running thread  <Thread(Thread-2747, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.17it/s]


current loss: [[3.33500175e+194]]
running thread  <Thread(Thread-2748, initial)>
running thread  <Thread(Thread-2749, initial)>
running thread  <Thread(Thread-2750, initial)>
running thread  <Thread(Thread-2751, initial)>
running thread  <Thread(Thread-2752, initial)>
running thread  <Thread(Thread-2753, initial)>
running thread  <Thread(Thread-2754, initial)>
running thread  <Thread(Thread-2755, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 16.62it/s]


current loss: [[4.2311124e+145]]
running thread  <Thread(Thread-2756, initial)>
running thread  <Thread(Thread-2757, initial)>
running thread  <Thread(Thread-2758, initial)>
running thread  <Thread(Thread-2759, initial)>
running thread  <Thread(Thread-2760, initial)>
running thread  <Thread(Thread-2761, initial)>
running thread  <Thread(Thread-2762, initial)>
running thread  <Thread(Thread-2763, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:03<00:00, 12.40it/s]


current loss: [[2.75771984e+123]]
running thread  <Thread(Thread-2764, initial)>
running thread  <Thread(Thread-2765, initial)>
running thread  <Thread(Thread-2766, initial)>
running thread  <Thread(Thread-2767, initial)>
running thread  <Thread(Thread-2768, initial)>
running thread  <Thread(Thread-2769, initial)>
running thread  <Thread(Thread-2770, initial)>
running thread  <Thread(Thread-2771, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 40/40 [00:02<00:00, 13.44it/s]

current loss: [[nan]]
5319271672313898.0
tensor([-1.0303,  0.3383,  1.0145])
tensor([5.3257e+15, 5.3257e+15, 5.3257e+15, 5.3257e+15, 5.3257e+15, 5.3257e+15,
        5.3257e+15, 5.3257e+15, 5.3257e+15, 5.3257e+15, 5.3257e+15, 5.3257e+15,
        5.3257e+15, 5.3257e+15, 5.3257e+15, 5.3257e+15, 5.3257e+15, 5.3193e+15,
        5.3257e+15, 5.3257e+15])
tensor([[ 1.0000e-04,  1.0000e-04,  1.0000e-04, -1.3602e+00,  6.2964e-01,
          8.4174e-01, -2.5236e-01,  6.0525e-01,  1.7894e+00,  6.1155e-02,
          8.5220e-01, -1.7842e+00, -7.3041e-01,  1.1160e+00,  3.7798e-01,
         -2.0286e+00, -5.3640e-01, -1.0303e+00, -5.8221e-02, -4.2254e-01],
        [ 1.0000e-03,  1.0000e-03,  1.0000e-03,  6.7530e-01,  5.9039e-01,
          5.6459e-01,  1.2596e+00,  6.7088e-01,  1.4162e+00,  9.2197e-01,
          3.4475e-01,  7.6778e-01,  1.6476e+00,  2.6488e+00,  1.0802e+00,
          1.9088e+00,  9.4263e-01,  3.3829e-01,  5.0835e-01,  2.4066e-01],
        [ 9.0000e-01,  9.0000e-01,  9.0000e-01,  1.3761e




In [43]:
# customized hyperparameter tryout
W1, W2 =\
nn_sgd_mss_with_momentum_threaded(Xs=Xs_tr, Ys=Ys_tr, gamma=0.0001, W10=W10, W20=W20, alpha=0.001, beta=0.9, B=8, num_epochs=200, num_threads=8)

running thread  <Thread(Thread-2772, initial)>
running thread  <Thread(Thread-2773, initial)>
running thread  <Thread(Thread-2774, initial)>
running thread  <Thread(Thread-2775, initial)>
running thread  <Thread(Thread-2776, initial)>
running thread  <Thread(Thread-2777, initial)>
running thread  <Thread(Thread-2778, initial)>
running thread  <Thread(Thread-2779, initial)>
Running minibatch sequential-scan SGD with momentum (8 threads)


100%|██████████| 200/200 [00:12<00:00, 16.16it/s]

current loss: [[49.42656076]]





In [48]:
W1 @ np.array([Receipt_Count[j] for j in range(364, 364 - day_range, -1)], dtype=float).reshape(30,)

array([-90983.71200771, -90983.71200771, -90983.71200771, -90983.71200771,
       -90983.71200771])

In [49]:
# use the model to predict the approximate number of the scanned receipts for each day of 2022
q = deque()
warm_up = [Receipt_Count[j] for j in range(364, 364 - day_range, -1)]
for count in warm_up:
    q.append(count)
predictions = []
for _ in range(365):
    X = np.array(q, dtype=float).reshape(30,)
    predictions.append(((W2 @ sigmoid(W1 @ (X - mean) / std)) * std + mean).item())
    q.popleft()
    q.append(predictions[-1])
# calculate the predicted number of the scanned receipts for each month of 2022
df = pandas.DataFrame()
df.index = pandas.date_range(start='1/1/2022', end='12/31/2022')
df["count"] = predictions
monthSum = df.groupby(df.index.month).sum()

In [50]:
df # predicted data

Unnamed: 0,count
2022-01-01,8.871687e+06
2022-01-02,8.873233e+06
2022-01-03,8.872166e+06
2022-01-04,8.871940e+06
2022-01-05,8.872501e+06
...,...
2022-12-27,8.872248e+06
2022-12-28,8.872248e+06
2022-12-29,8.872248e+06
2022-12-30,8.872248e+06
