In [2]:
data_path = 'mnist_data_handwritten'

In [1]:

import numpy as np

# Define two 3D arrays
a = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
b = np.array([[[1, 3], [5, 7]], [[2, 4], [6, 8]]])

# Compute tensor dot product
result = np.tensordot(a, b, axes=([1, 0], [0, 1]))

print(result)

[[ 74 106]
 [ 88 128]]


### Data Process

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import random

def display_random_images(images, num_images=5):
    plt.figure(figsize=(10, 10))
    for i in range(num_images):

        idx = random.randint(0, len(images) - 1)
        img = images[idx]

        plt.subplot(1, num_images, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(img, cmap=plt.cm.binary)
        plt.xlabel(i)

    plt.show()




In [4]:
import struct
import os

def read_idx(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)
    


def load_mnist(path, kind='train'):
    labels_path = os.path.join(path, f'{kind}-labels.idx1-ubyte')
    images_path = os.path.join(path, f'{kind}-images.idx3-ubyte')

    labels = read_idx(labels_path)
    images = read_idx(images_path)


    return images, labels

def data_preprocess(x,y):
    x[x<=40]=0
    x[x>40] =1
    return x,y

def normalize(image):
    image -= image.min()
    image = image / image.max()
    image = image * 1.275 - 0.1
    return image

In [5]:
# 使用函数加载数据
train_images, train_labels = load_mnist(data_path, kind='train')
test_images, test_labels = load_mnist(data_path, kind='t10k')
#display_random_images(train_images)

print(train_images.shape)
print(train_labels.shape)


(60000, 28, 28)
(60000,)


### Predict

In [6]:
import numpy as np


def predict(test_images, model):
    scores = model.output(test_images)
    preds = np.argmax(scores, axis=1)
    return preds

def cal_accuracy(y_pred, y):
    # TODO: Compute the accuracy among the test set and store it in acc

    rs= 0
    for _ in range(len(y)):
        
        if y_pred[_] == y[_]:
            rs+=1
    return rs/len(y)

def get_acc(x,y,model):
    preds = predict(x,model)
    return cal_accuracy(preds,y)

### Softmax & cross-entropy

In [7]:
def softmax(x):
    # x [6000,10]

    x_exp = np.exp(x)

    return x_exp / (np.sum(x_exp,axis=1,keepdims=True))
    
def loss(y_hat,y):
    if len(y.shape) >1 :
        y = np.squeeze(y,axis=1)
    #y_hat [6000.10] y[60000]
    p = np.log(y_hat[list(range(len(y))),y]).mean()
    return -p
def cross_entropy(y_hat,y):
    if len(y.shape) >1 :
        y = np.squeeze(y,axis=1)
    #y_hat [6000.10] y[60000]
    p = np.log(y_hat[list(range(len(y))),y]).mean()
    return -p

def batch_generator(x,y,batch_size):
    num = len(x)
    for st in range(0,num,batch_size):
        ed = min(st+batch_size,num)
        yield x[st:ed], y[st:ed]

def plpot(loss_values,acc_values):
    import matplotlib.pyplot as plt
    # 创建新的图表窗口
    plt.figure()

    # 绘制 loss 曲线
    plt.subplot(2, 1, 1)  # 创建一个 2x1 的图表网格，并选择第一个子图
    plt.plot(loss_values, label='Loss')
    plt.title('Loss over time')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # 绘制 accuracy 曲线
    plt.subplot(2, 1, 2)  # 创建一个 2x1 的图表网格，并选择第二个子图
    plt.plot(acc_values, label='Accuracy', color='orange')
    plt.title('Accuracy over time')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    
    # 显示图表
    plt.tight_layout()  # 自动调整子图参数，以保证图表不会重叠
    plt.show()
    #plt.savefig('theta_1000_0.05.png')


def get_gradient(x,y,y_hat,theta):
    reg_rate = 0.01
    l = loss(y_hat, y) +  reg_rate * np.sum(theta*theta)
    y_hat[list(range(len(y))),y]-=1
    gradient = (y_hat.T) @ x 

    return gradient/len(x) ,l

### 多层感知器


In [20]:
import numpy as np

class Block:

    def __init__(self, name="Block") :
        self.name = name
        self.reg_rate = 0.05


    def forwards(self,)->None:
        pass
    def backwards(self,)->None:
        pass
    def test_dims(self,)->None:
        pass


class MLP(Block):
    def __init__(self, lname,dims, lr=0.001,bia=True):
        super().__init__(name="MLP-"+lname)
        self.lr = lr
        self.w = np.random.randn(*dims)
        if bia:
            self.bia = np.zeros(dims[-1])

    
    #forwards:
    #input: [256, 120]  w: [120, 84]
    #return [256, 84]
    def forwards(self,x):
        self.input = x
        output = x @ self.w 
        if hasattr(self, 'bia'):
            output += self.bia  
        return output
    
    def backwards(self, dA):

        #dA [256, 84]  dw[120, 84] = [256,120].T @ [256,84] 
        dW = self.input.T @ dA
        dA = dA @ self.w.T

        if hasattr(self, 'bia'):
            db = np.sum(dA, axis=1).mean()

        self.w -= self.lr * dW
        if hasattr(self, 'bia'):
            self.bia -= self.lr * db

        return dA

    def test(self, X):
        self.forwards(X)
        self.backwards(X)
        print()
        


class Sigmoid(Block):
    def __init__(self):
        super().__init__(name="Sigmoid")

    # Sigmoid函数
    #input[256,1,n,n]
    # return [256,1,n,n]
    def forwards(self, x):
        self.input = x
        output = 1 / (1 + np.exp(-x))
         
        return output
    
    def backwards(self, dout):
        sigmoid_derivative = self.input * (1 - self.input)  
        dA = dout * sigmoid_derivative 
        return dA

    def test(self):
        
        pass

class Flatten(Block):
    def __init__(self):
        super().__init__(name="Flatten")

    def forwards(self, x):
        self.input_shape = x.shape  
        output = x.reshape(x.shape[0], -1)
        return output
    
    def backwards(self, dout):
        dA = dout.reshape(self.input_shape) 
        return dA

    
    def test(self):

        pass


class SoftMax(Block):
    def __init__(self):
        super().__init__(name="Softmax")


    def forwards(self, x):
        self.input = x
        self.output = softmax(x)
        return self.output
    
    def backwards(self,y):
        # 默认交叉熵
        dA = self.output
        dA[list(range(len(y))),y]-=1
        #dA = (self.output.T) @ self.input 
        return dA 
    


class Model():
    def __init__(self,name,model_list):
        self.name = name
        self.model_list  = model_list
        

    def output(self,x):
        for layer in self.model_list:
            x = layer.forwards(x)
        return x

    def backwards(self, y):
        dA = y
        for layer in self.model_list.__reversed__():
                dA = layer.backwards(dA)
    
    
        
        



In [21]:
def zero_padding(X, pad):
    X_pad = np.pad(X, ((0, 0), (pad, pad), (pad, pad), (0, 0)), 'constant', constant_values=(0, 0))
    return X_pad


In [30]:
class ConvolutionalLayer(Block):
    def __init__(self, lname,in_channels, out_channels, kernel_size, stride=1, padding=0, lr=0.005):
        super().__init__(name="ConvLayer"+lname)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.lr = lr

        # 初始化权重和偏置
        self.w = np.random.randn(kernel_size, kernel_size, in_channels, out_channels)
        self.bias = np.zeros(out_channels)


    def _simple_conv(self, x):
        self.input = x

        batch, in_channel, in_h, in_w = x.shape
        out_h = (in_h + 2*self.padding - self.kernel_size) // self.stride + 1
        out_w = (in_w + 2*self.padding - self.kernel_size) // self.stride + 1
        assert(out_h == out_w and in_channel == self.in_channels)
  
        if self.padding > 0:
            X_padding = zero_padding(self.input,self.padding)
            #input_padding[:, :, self.padding:-self.padding, self.padding:-self.padding]=self.input
        else:
            X_padding = self.input

        output = np.zeros((batch, self.out_channels, out_h, out_w))

        for i in range(batch):
            for channel in range(self.out_channels):
                for h in range(out_h):
                    for w in range(out_w):
                        h_start = h * self.stride
                        h_end = h_start + self.kernel_size
                        w_start = w * self.stride
                        w_end = w_start + self.kernel_size

                        #window = x[i, :, h_start:h_end, w_start:w_end]
                        #output[i, channel, h, w] = np.sum(window * self.weights[channel]) + self.bias[channel]   
                        conv_sum = 0
                        # 对input map 的所有channel 同一位置卷积结果平均
                        for m in range(in_channel): 
                 
                            window = X_padding[i, m, h_start:h_end, w_start:w_end]
                            
                            conv_sum += np.sum(window * self.w[channel, m])

                        output[i, channel, h, w] = conv_sum + self.bias[channel]
          
        return output

    def _optimized_forward(self,x):
        self.input = x
        (m, n_H_prev, n_W_prev, n_C_prev) = x.shape
        (f, f, n_C_prev, n_C) = self.w.shape

        stride, pad = self.stride, self.padding

        n_H = int((n_H_prev + 2 * pad - f) / stride + 1)
        n_W = int((n_W_prev + 2 * pad - f) / stride + 1)

        # Initialize the output volume Z with zeros.
        Z = np.zeros((m, n_H, n_W, n_C))
        A_prev_pad = zero_padding(x, pad)
        for h in range(n_H):
            for w in range(n_W):
                # Use the corners to define the (3D) slice of a_prev_pad.
                A_slice_prev = A_prev_pad[:, h * stride:h * stride + f, w * stride:w * stride + f, :]
                # Convolve the (3D) slice with the correct filter W and bias b, to get back one output neuron.
                Z[:, h, w, :] = np.tensordot(A_slice_prev, self.w, axes=([1, 2, 3], [0, 1, 2])) + self.bias

        assert (Z.shape == (m, n_H, n_W, n_C))
        #cache = (A_prev, W, b, hyper_parameters)
        return Z


    def forwards(self, x):
       return self._optimized_forward(x)

    def _simple_bac(self, dout):
        batch_size, _, in_height, in_width = self.input.shape
        _, _, out_height, out_width = dout.shape

        dW = np.zeros(self.w.shape)
        dX_padded = np.zeros((batch_size, self.in_channels, in_height + 2 * self.padding, in_width + 2 * self.padding))
        db = np.zeros(self.bias.shape)

        
        if self.padding > 0:
            dX_padded = zero_padding(dX_padded,self.padding)
            X_padded = zero_padding(self.input,self.padding)
            #dX_padded[:, :, self.padding:-self.padding, self.padding:-self.padding] = self.input
        else:
            X_padded = self.input
        
        for i in range(batch_size): 
            for j in range(self.out_channels): 
                for k in range(self.in_channels):  
                    for m in range(out_height): 
                        for n in range(out_width):  
                            h_start,w_start = m * self.stride,n * self.stride
                            window = X_padded[i, k, h_start:h_start + self.kernel_size, w_start:w_start + self.kernel_size]

                            # 更新权重梯度
                            dW[j, k] += window * dout[i, j, m, n]

                            # 更新输入梯度
                            dX_padded[i, k, h_start:h_start + self.kernel_size, w_start:w_start + self.kernel_size] += \
                                self.w[j, k] * dout[i, j, m, n]

        for j in range(self.out_channels): 
            db[j] = np.sum(dout[:, j, :, :])

        
        if self.padding > 0:
            dX = dX_padded[:, :, self.padding:-self.padding, self.padding:-self.padding]
        else:
            dX = dX_padded

        self.update(dW, db)
        return dX

    def _optimized_bac(self,dout):
        A_prev, W, b = self.input,self.w, self.bias
        m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape
        f, f, n_C_prev, n_C = W.shape
        m, n_H, n_W, n_C = dout.shape
        stride,pad = self.stride, self.padding

        dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))
        dW = np.zeros((f, f, n_C_prev, n_C))
        db = np.zeros((1, 1, 1, n_C))

        if pad != 0:
            A_prev_pad = zero_padding(A_prev, pad)
            dA_prev_pad = zero_padding(dA_prev, pad)
        else:
            A_prev_pad = A_prev
            dA_prev_pad = dA_prev

        for h in range(n_H):
            for w in range(n_W):
                # Find the corners of the current "slice"
                vert_start, horiz_start = h * stride, w * stride
                vert_end, horiz_end = vert_start + f, horiz_start + f

                # Use the corners to define the slice from a_prev_pad
                A_slice = A_prev_pad[:, vert_start:vert_end, horiz_start:horiz_end, :]

                # Update gradients for the window and the filter's parameters
                dA_prev_pad[:, vert_start:vert_end, horiz_start:horiz_end, :] += np.transpose(np.dot(W, dout[:, h, w, :].T), (3, 0, 1, 2))

                dW += np.dot(np.transpose(A_slice, (1, 2, 3, 0)), dout[:, h, w, :])
                db += np.sum(dout[:, h, w, :], axis=0)

        # Set dA_prev to the unpadded dA_prev_pad
        dA_prev = dA_prev_pad if pad == 0 else dA_prev_pad[:, pad:-pad, pad:-pad, :]

        # Making sure your output shape is correct
        assert (dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))
 
        return dA_prev

    # dout[256 16 10 10]  -反卷积>  dw[16 5 5]   input [256 6 14 14]     
    def backwards(self, dout):
        
        return self._optimized_bac(dout)

    def update(self,dW, db,  momentum=0):
        vw_u = - self.lr * dW
        vb_u = - self.lr * db
        self.w += vw_u
        self.bias += vb_u
        return

class PoolingLayer(Block):
    def __init__(self, pool_size, stride, mode='average'):
        super().__init__(name="PoolingLayer")
        self.pool_size = pool_size
        self.stride = stride
        self.mode = mode

    def _simple_for(self, x):
        batch_size, channels, height, width = x.shape
        out_height = (height) // self.stride 
        out_width = (width ) // self.stride 

        output = np.zeros((batch_size, channels, out_height, out_width))

        for i in range(batch_size):
            for j in range(channels):
                for k in range(out_height):
                    for l in range(out_width):
                        h_start = k * self.stride
                        w_start = l * self.stride
                        window = x[i, j, h_start:h_start + self.pool_size, w_start:w_start + self.pool_size]

                        if self.mode == 'max':
                            output[i, j, k, l] = np.max(window)
                        elif self.mode == 'average':
                            output[i, j, k, l] = np.mean(window)

        return output
    
    def _optimized_for(self, x):
        m, n_H_prev, n_W_prev, n_C_prev = x.shape
        f,stride = self.pool_size, self.stride
       
        n_H = int(1 + (n_H_prev - f) / stride)
        n_W = int(1 + (n_W_prev - f) / stride)
        n_C = n_C_prev

        A = np.zeros((m, n_H, n_W, n_C))
        for h in range(n_H):
            for w in range(n_W):
                # Use the corners to define the current slice on the ith training example of A_prev, channel c
                A_prev_slice = x[:, h * stride:h * stride + f, w * stride:w * stride + f, :]
                # Compute the pooling operation on the slice. Use an if statement to differentiate the modes.
                if self.mode == "max":
                    A[:, h, w, :] = np.max(A_prev_slice, axis=(1, 2))
                elif self.mode == "average":
                    A[:, h, w, :] = np.average(A_prev_slice, axis=(1, 2))

        # cache = (A_prev, hyper_parameters)
        # assert (A.shape == (m, n_H, n_W, n_C))
        return A

    def forwards(self, x):
        self.input = x
        return self._optimized_for(x)


    def _simple_bac(self, dout):
        batch_size, channels, out_height, out_width = dout.shape
        #out_height, out_width = dout.shape[2], dout.shape[3]
        
        dX = np.zeros_like(self.input)

        for i in range(batch_size):
            for j in range(channels):
                for k in range(out_height):
                    for l in range(out_width):
                        h_start = k * self.stride
                        w_start = l * self.stride
                        window = self.input[i, j, h_start:h_start + self.pool_size, w_start:w_start + self.pool_size]

                        if self.mode == 'max':
                            max_val = np.max(window)
                            mask = (window == max_val)
                            dX[i, j, h_start:h_start + self.pool_size, w_start:w_start + self.pool_size] += mask * dout[i, j, k, l]
                        elif self.mode == 'average':
                            average_val = dout[i, j, k, l] / (self.pool_size * self.pool_size)
                            dX[i, j, h_start:h_start + self.pool_size, w_start:w_start + self.pool_size] += np.ones_like(window) * average_val

        return dX

    def _optimized_bac(self, dout):
        A_prev=self.input

        stride, f = self.stride, self.pool_size

        m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape  # 256,28,28,6
        m, n_H, n_W, n_C = dout.shape  # 256,14,14,6

        dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))  # 256,28,28,6

        for h in range(n_H):
            for w in range(n_W):
                # Find the corners of the current "slice"
                vert_start, horiz_start = h * stride, w * stride
                vert_end, horiz_end = vert_start + f, horiz_start + f

                # Compute the backward propagation in both modes.
                if self.mode == "max":
                    A_prev_slice = A_prev[:, vert_start: vert_end, horiz_start: horiz_end, :]
                    A_prev_slice = np.transpose(A_prev_slice, (1, 2, 3, 0))
                    mask = A_prev_slice == A_prev_slice.max((0, 1))
                    mask = np.transpose(mask, (3, 2, 0, 1))
                    dA_prev[:, vert_start: vert_end, horiz_start: horiz_end, :] \
                        += np.transpose(np.multiply(dout[:, h, w, :][:, :, np.newaxis, np.newaxis], mask), (0, 2, 3, 1))

                elif self.mode == "average":
                    da = dout[:, h, w, :][:, np.newaxis, np.newaxis, :]  # 256*1*1*6
                    dA_prev[:, vert_start: vert_end, horiz_start: horiz_end, :] += np.repeat(np.repeat(da, 2, axis=1), 2, axis=2) / f / f

        assert (dA_prev.shape == A_prev.shape)
        return dA_prev
    def backwards(self, dout):
        return self._optimized_bac(dout)
        





#### test

In [10]:
a = np.ones((256,16,5,5))
layers = [Flatten(), MLP("0", (16*25,120)),Sigmoid(),MLP("1",(120,84)),Sigmoid(),MLP("2",(84,10))]

shap = (8,16,5,5)
x = np.random.randn(*shap)

for l in layers:
    x = l.forwards(x)
    print(l.name + " " , list(x.shape)) 
    
print('backwards')
dA = x
for l in layers.__reversed__():
    dA = l.backwards(dA)
    print(l.name + " " , list(dA.shape)) 


Flatten  [8, 400]
MLP-0  [8, 120]
Sigmoid  [8, 120]
MLP-1  [8, 84]
Sigmoid  [8, 84]
MLP-2  [8, 10]
backwards
MLP-2  [8, 84]
Sigmoid  [8, 84]
MLP-1  [8, 120]
Sigmoid  [8, 120]
MLP-0  [8, 400]
Flatten  [8, 16, 5, 5]


In [11]:
def softmax_regression(model, x, y, iters, batch_size=1):

    if len(y.shape)>1:
        y=np.squeeze(y)
    #batch_size = 600
    loss_ls = []
    acc_train = []
    acc_test = []

    for epoch in range(iters):
        loss_sum = 0
        genertor = batch_generator(x, y, batch_size)
        for i,(x_,y_) in enumerate(genertor):
            #print(f"epoch:{i}:")
            y_hat = model.output(x_)
            l = cross_entropy(y_hat,y_)
            loss_sum = loss_sum+l
            model.backwards(y_)
            
            if not (((i+1)*batch_size))%1000:
                loss_ls.append(l)
                #acc_train.append(get_acc(x, y, model))
                #print(f"Loss:{l}")
        print(f"Epoch{epoch}\n Loss:{loss_sum/(60000)},acc:{get_acc(x,y,model)}")

    plpot(loss_ls,acc_train)
    return 

In [1]:
layers = [Flatten(), MLP("0", (28*28,10)),SoftMax()]
model = Model("softmax", layers)


x,y = data_preprocess(train_images.copy(),train_labels.copy())

#iter = batch_generator(train_images,train_labels,batch_size=256)
softmax_regression(model,x,y,30,batch_size=600)
ty,tyl = data_preprocess(test_images.copy(),test_labels.copy())
get_acc(ty,tyl ,model)


layers =  [GFlatten(). <:P)""A,(-28,2ii,2),SffaMax()]

SyntaxError: invalid syntax (2583132500.py, line 13)

In [13]:
ty,tyl = data_preprocess(test_images.copy(),test_labels.copy())

get_acc(ty,tyl ,model)
layers = [latten(),MLP("0", (29,202,10)), softmax()]

model = Model(s"saaq", layers)



0.8901

### 卷积

In [12]:
def train(model, x, y, iters, batch_size=128):

    #batch_size = 600
    loss_ls = []
    acc_train = []
    acc_test = []

    #print(f"Epoch{-1}\n acc:{get_acc(x,y,model)}")
    from tqdm import trange
    for epoch in trange(iters):
        loss_sum = 0
        genertor = batch_generator(x, y, batch_size)
        for i,(x_,y_) in enumerate(genertor):
            #print(f"epoch:{i}:")
            y_hat = model.output(x_)
            l = cross_entropy(y_hat,y_)
            loss_sum = loss_sum+l
            model.backwards(y_)
            
            if not (((i+1)*batch_size))%1000:
                loss_ls.append(l)
                #acc_train.append(get_acc(x, y, model))
                #print(f"Loss:{l}")
        print(f"Epoch{epoch}\n Loss:{loss_sum/(60000)},acc:{get_acc(x,y,model)}")

    plpot(loss_ls,acc_train)
    return 

In [32]:
alpha = 0.0005

layers = [ConvolutionalLayer("1->6",1,6,5,stride=1,padding=2,lr=alpha),
          Sigmoid(),
          PoolingLayer(2,2,mode='average'),
          ConvolutionalLayer("6->16",6,16,5,stride=1,padding=0,lr=alpha),
          Sigmoid(),
          PoolingLayer(2,2,mode='average'),
          Flatten(),
          MLP("400->120",(400,120),lr=alpha),
          Sigmoid(),
          MLP("120->84",(120,84),lr=alpha),
          Sigmoid(),
          MLP("84->10",(84,10),lr=alpha),
          SoftMax()
         ]

LeNet5 = Model(name="Fond-LeNet",model_list=layers)

train_images, train_labels = load_mnist(data_path, kind='train')
test_images, test_labels = load_mnist(data_path, kind='t10k')

x,y = (normalize(train_images.copy()[:60]),  train_labels[:60])
x = np.expand_dims(x, axis=-1)
#iter = batch_generator(train_images,train_labels,batch_size=256)
train(LeNet5,x,y,iters=30,batch_size=6)
ty,tyl = data_preprocess(test_images.copy(),test_labels.copy())
print("Test Acc",get_acc(ty,tyl ,model))


  0%|          | 0/30 [00:00<?, ?it/s]

  output = 1 / (1 + np.exp(-x))
  dA = dA @ self.w.T
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  3%|▎         | 1/30 [00:00<00:11,  2.44it/s]

Epoch0
 Loss:nan,acc:0.1


  7%|▋         | 2/30 [00:00<00:11,  2.49it/s]

Epoch1
 Loss:nan,acc:0.1


 10%|█         | 3/30 [00:01<00:10,  2.50it/s]

Epoch2
 Loss:nan,acc:0.1


 13%|█▎        | 4/30 [00:01<00:10,  2.50it/s]

Epoch3
 Loss:nan,acc:0.1


 17%|█▋        | 5/30 [00:02<00:09,  2.50it/s]

Epoch4
 Loss:nan,acc:0.1


 20%|██        | 6/30 [00:02<00:09,  2.49it/s]

Epoch5
 Loss:nan,acc:0.1


 23%|██▎       | 7/30 [00:02<00:09,  2.49it/s]

Epoch6
 Loss:nan,acc:0.1


 27%|██▋       | 8/30 [00:03<00:08,  2.51it/s]

Epoch7
 Loss:nan,acc:0.1


 30%|███       | 9/30 [00:03<00:08,  2.50it/s]

Epoch8
 Loss:nan,acc:0.1


 33%|███▎      | 10/30 [00:04<00:08,  2.49it/s]

Epoch9
 Loss:nan,acc:0.1


 37%|███▋      | 11/30 [00:04<00:07,  2.51it/s]

Epoch10
 Loss:nan,acc:0.1


 40%|████      | 12/30 [00:04<00:07,  2.50it/s]

Epoch11
 Loss:nan,acc:0.1


 43%|████▎     | 13/30 [00:05<00:06,  2.52it/s]

Epoch12
 Loss:nan,acc:0.1


 47%|████▋     | 14/30 [00:05<00:06,  2.51it/s]

Epoch13
 Loss:nan,acc:0.1


 50%|█████     | 15/30 [00:05<00:05,  2.52it/s]

Epoch14
 Loss:nan,acc:0.1


 53%|█████▎    | 16/30 [00:06<00:05,  2.51it/s]

Epoch15
 Loss:nan,acc:0.1


 57%|█████▋    | 17/30 [00:06<00:05,  2.50it/s]

Epoch16
 Loss:nan,acc:0.1


 60%|██████    | 18/30 [00:07<00:04,  2.51it/s]

Epoch17
 Loss:nan,acc:0.1


 63%|██████▎   | 19/30 [00:07<00:04,  2.46it/s]

Epoch18
 Loss:nan,acc:0.1


 67%|██████▋   | 20/30 [00:08<00:04,  2.49it/s]

Epoch19
 Loss:nan,acc:0.1


 70%|███████   | 21/30 [00:08<00:03,  2.50it/s]

Epoch20
 Loss:nan,acc:0.1


 73%|███████▎  | 22/30 [00:08<00:03,  2.50it/s]

Epoch21
 Loss:nan,acc:0.1


 77%|███████▋  | 23/30 [00:09<00:02,  2.51it/s]

Epoch22
 Loss:nan,acc:0.1


 80%|████████  | 24/30 [00:09<00:02,  2.51it/s]

Epoch23
 Loss:nan,acc:0.1


 83%|████████▎ | 25/30 [00:09<00:01,  2.50it/s]

Epoch24
 Loss:nan,acc:0.1


 87%|████████▋ | 26/30 [00:10<00:01,  2.50it/s]

Epoch25
 Loss:nan,acc:0.1


 90%|█████████ | 27/30 [00:10<00:01,  2.46it/s]

Epoch26
 Loss:nan,acc:0.1





KeyboardInterrupt: 