In [9]:
import torch
import numpy as np
import time
import torch.nn as nn
from torchvision import transforms
import matplotlib.pyplot as plt
from planar_utils import plot_decision_boundary, sigmoid, load_planar_dataset, load_extra_datasets
%matplotlib inline

X, Y = load_planar_dataset()
X = torch.Tensor(X)
Y = torch.Tensor(Y)

def layer_sizes(X, Y):

    # size of input layer
    n_x = X.shape[0]

    # size of output layer
    n_y = Y.shape[0]
    
    return (n_x, n_y)

class Net(nn.Module):
    def __init__(self, n_h, X, Y):
        super(Net,self).__init__()
        
        n_x = layer_sizes(X, Y)[0]
        n_y = layer_sizes(X, Y)[2]
        n_h = n_h
        
        np.random.seed(1)
        W1 = np.random.randn(n_h, n_x) * 0.01
        b1 = np.zeros((n_h, 1))
        W2 = np.random.randn(n_y, n_h) * 0.01
        b2 = np.zeros((n_y, 1))
        
        self.W1 = nn.Parameter(torch.Tensor(W1), requires_grad=True)
        self.b1 = nn.Parameter(torch.Tensor(b1), requires_grad=True)
        
        self.W2 = nn.Parameter(torch.Tensor(W2), requires_grad=True)
        self.b2 = nn.Parameter(torch.Tensor(b2), requires_grad=True)
        
    def forward(self, X):
        X = torch.Tensor(X)
        H = nn.Tanh()(torch.mm(self.W1, X) + self.b1)   # Here '@' stands for dot product operation
        Z = torch.mm(self.W2, H) + self.b2
        return (nn.Sigmoid()(Z))
    
net = Net(4, X, Y)
criterion = nn.BCELoss()

num_iterations, lr = 5000, 1.2
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
# optimizer = torch.optim.Adam(net.parameters(), lr=0.01, weight_decay=1e-4)

for iteration in range(num_iterations):
    st = time.time()

    net.train()
    optimizer.zero_grad() # clear existed gradient
    
    y_hat = net(X) 
    loss = criterion(y_hat, Y) 
    loss.backward()
    optimizer.step()
    
    train_acc_sum = torch.sum((y_hat>0.5) == Y)#?
    n = X.shape[1]
    train_acc = train_acc_sum / n
    print('iteration %d, loss %.4f, train acc %.3f, time %.2f ' % (iteration + 1, loss, train_acc, time.time()-st))


iteration 1, loss 0.6931, train acc 0.668, time 0.00 
iteration 2, loss 0.6930, train acc 0.460, time 0.00 
iteration 3, loss 0.6930, train acc 0.475, time 0.00 
iteration 4, loss 0.6928, train acc 0.490, time 0.00 
iteration 5, loss 0.6927, train acc 0.498, time 0.00 
iteration 6, loss 0.6924, train acc 0.498, time 0.00 
iteration 7, loss 0.6921, train acc 0.498, time 0.00 
iteration 8, loss 0.6916, train acc 0.498, time 0.00 
iteration 9, loss 0.6909, train acc 0.498, time 0.00 
iteration 10, loss 0.6900, train acc 0.498, time 0.00 
iteration 11, loss 0.6889, train acc 0.498, time 0.00 
iteration 12, loss 0.6876, train acc 0.498, time 0.00 
iteration 13, loss 0.6862, train acc 0.502, time 0.00 
iteration 14, loss 0.6847, train acc 0.505, time 0.00 
iteration 15, loss 0.6832, train acc 0.515, time 0.00 
iteration 16, loss 0.6818, train acc 0.530, time 0.00 
iteration 17, loss 0.6804, train acc 0.545, time 0.00 
iteration 18, loss 0.6791, train acc 0.545, time 0.00 
iteration 19, loss 

iteration 276, loss 0.3153, train acc 0.877, time 0.00 
iteration 277, loss 0.3152, train acc 0.877, time 0.00 
iteration 278, loss 0.3150, train acc 0.877, time 0.00 
iteration 279, loss 0.3149, train acc 0.877, time 0.00 
iteration 280, loss 0.3148, train acc 0.877, time 0.00 
iteration 281, loss 0.3147, train acc 0.877, time 0.00 
iteration 282, loss 0.3145, train acc 0.877, time 0.00 
iteration 283, loss 0.3144, train acc 0.877, time 0.00 
iteration 284, loss 0.3143, train acc 0.877, time 0.00 
iteration 285, loss 0.3141, train acc 0.877, time 0.00 
iteration 286, loss 0.3140, train acc 0.877, time 0.00 
iteration 287, loss 0.3139, train acc 0.877, time 0.00 
iteration 288, loss 0.3138, train acc 0.877, time 0.00 
iteration 289, loss 0.3136, train acc 0.877, time 0.00 
iteration 290, loss 0.3135, train acc 0.877, time 0.00 
iteration 291, loss 0.3134, train acc 0.877, time 0.00 
iteration 292, loss 0.3133, train acc 0.877, time 0.00 
iteration 293, loss 0.3131, train acc 0.877, tim

iteration 577, loss 0.2930, train acc 0.877, time 0.00 
iteration 578, loss 0.2930, train acc 0.877, time 0.00 
iteration 579, loss 0.2929, train acc 0.877, time 0.00 
iteration 580, loss 0.2929, train acc 0.877, time 0.00 
iteration 581, loss 0.2928, train acc 0.877, time 0.00 
iteration 582, loss 0.2928, train acc 0.877, time 0.00 
iteration 583, loss 0.2927, train acc 0.877, time 0.00 
iteration 584, loss 0.2927, train acc 0.877, time 0.00 
iteration 585, loss 0.2927, train acc 0.877, time 0.00 
iteration 586, loss 0.2926, train acc 0.877, time 0.00 
iteration 587, loss 0.2926, train acc 0.877, time 0.00 
iteration 588, loss 0.2925, train acc 0.877, time 0.00 
iteration 589, loss 0.2925, train acc 0.877, time 0.00 
iteration 590, loss 0.2924, train acc 0.877, time 0.00 
iteration 591, loss 0.2924, train acc 0.877, time 0.00 
iteration 592, loss 0.2924, train acc 0.877, time 0.00 
iteration 593, loss 0.2923, train acc 0.877, time 0.00 
iteration 594, loss 0.2923, train acc 0.877, tim

iteration 853, loss 0.2843, train acc 0.877, time 0.00 
iteration 854, loss 0.2843, train acc 0.877, time 0.00 
iteration 855, loss 0.2842, train acc 0.877, time 0.00 
iteration 856, loss 0.2842, train acc 0.877, time 0.00 
iteration 857, loss 0.2842, train acc 0.877, time 0.00 
iteration 858, loss 0.2842, train acc 0.877, time 0.00 
iteration 859, loss 0.2841, train acc 0.877, time 0.00 
iteration 860, loss 0.2841, train acc 0.877, time 0.00 
iteration 861, loss 0.2841, train acc 0.877, time 0.00 
iteration 862, loss 0.2841, train acc 0.877, time 0.00 
iteration 863, loss 0.2841, train acc 0.877, time 0.00 
iteration 864, loss 0.2840, train acc 0.877, time 0.00 
iteration 865, loss 0.2840, train acc 0.877, time 0.00 
iteration 866, loss 0.2840, train acc 0.877, time 0.00 
iteration 867, loss 0.2840, train acc 0.877, time 0.00 
iteration 868, loss 0.2839, train acc 0.877, time 0.00 
iteration 869, loss 0.2839, train acc 0.877, time 0.00 
iteration 870, loss 0.2839, train acc 0.877, tim

iteration 1133, loss 0.2790, train acc 0.882, time 0.00 
iteration 1134, loss 0.2790, train acc 0.882, time 0.00 
iteration 1135, loss 0.2789, train acc 0.882, time 0.00 
iteration 1136, loss 0.2789, train acc 0.882, time 0.00 
iteration 1137, loss 0.2789, train acc 0.882, time 0.00 
iteration 1138, loss 0.2789, train acc 0.882, time 0.00 
iteration 1139, loss 0.2789, train acc 0.882, time 0.00 
iteration 1140, loss 0.2789, train acc 0.882, time 0.00 
iteration 1141, loss 0.2789, train acc 0.882, time 0.00 
iteration 1142, loss 0.2788, train acc 0.882, time 0.00 
iteration 1143, loss 0.2788, train acc 0.882, time 0.00 
iteration 1144, loss 0.2788, train acc 0.882, time 0.00 
iteration 1145, loss 0.2788, train acc 0.882, time 0.00 
iteration 1146, loss 0.2788, train acc 0.882, time 0.00 
iteration 1147, loss 0.2788, train acc 0.882, time 0.00 
iteration 1148, loss 0.2787, train acc 0.882, time 0.00 
iteration 1149, loss 0.2787, train acc 0.882, time 0.00 
iteration 1150, loss 0.2787, tr

iteration 1436, loss 0.2748, train acc 0.890, time 0.00 
iteration 1437, loss 0.2748, train acc 0.890, time 0.00 
iteration 1438, loss 0.2747, train acc 0.890, time 0.00 
iteration 1439, loss 0.2747, train acc 0.890, time 0.00 
iteration 1440, loss 0.2747, train acc 0.890, time 0.00 
iteration 1441, loss 0.2747, train acc 0.890, time 0.00 
iteration 1442, loss 0.2747, train acc 0.890, time 0.00 
iteration 1443, loss 0.2747, train acc 0.890, time 0.00 
iteration 1444, loss 0.2747, train acc 0.890, time 0.00 
iteration 1445, loss 0.2747, train acc 0.890, time 0.00 
iteration 1446, loss 0.2746, train acc 0.890, time 0.00 
iteration 1447, loss 0.2746, train acc 0.890, time 0.00 
iteration 1448, loss 0.2746, train acc 0.890, time 0.00 
iteration 1449, loss 0.2746, train acc 0.890, time 0.00 
iteration 1450, loss 0.2746, train acc 0.890, time 0.00 
iteration 1451, loss 0.2746, train acc 0.890, time 0.00 
iteration 1452, loss 0.2746, train acc 0.890, time 0.00 
iteration 1453, loss 0.2746, tr

iteration 1721, loss 0.2716, train acc 0.890, time 0.00 
iteration 1722, loss 0.2716, train acc 0.890, time 0.00 
iteration 1723, loss 0.2716, train acc 0.890, time 0.00 
iteration 1724, loss 0.2715, train acc 0.890, time 0.00 
iteration 1725, loss 0.2715, train acc 0.890, time 0.00 
iteration 1726, loss 0.2715, train acc 0.890, time 0.00 
iteration 1727, loss 0.2715, train acc 0.890, time 0.00 
iteration 1728, loss 0.2715, train acc 0.890, time 0.00 
iteration 1729, loss 0.2715, train acc 0.890, time 0.00 
iteration 1730, loss 0.2715, train acc 0.890, time 0.00 
iteration 1731, loss 0.2715, train acc 0.890, time 0.00 
iteration 1732, loss 0.2715, train acc 0.890, time 0.00 
iteration 1733, loss 0.2715, train acc 0.890, time 0.00 
iteration 1734, loss 0.2714, train acc 0.890, time 0.00 
iteration 1735, loss 0.2714, train acc 0.890, time 0.00 
iteration 1736, loss 0.2714, train acc 0.890, time 0.00 
iteration 1737, loss 0.2714, train acc 0.890, time 0.00 
iteration 1738, loss 0.2714, tr

iteration 2012, loss 0.2688, train acc 0.885, time 0.00 
iteration 2013, loss 0.2688, train acc 0.885, time 0.00 
iteration 2014, loss 0.2688, train acc 0.885, time 0.00 
iteration 2015, loss 0.2688, train acc 0.885, time 0.00 
iteration 2016, loss 0.2688, train acc 0.885, time 0.00 
iteration 2017, loss 0.2688, train acc 0.885, time 0.00 
iteration 2018, loss 0.2688, train acc 0.885, time 0.00 
iteration 2019, loss 0.2688, train acc 0.885, time 0.00 
iteration 2020, loss 0.2688, train acc 0.885, time 0.00 
iteration 2021, loss 0.2688, train acc 0.885, time 0.00 
iteration 2022, loss 0.2688, train acc 0.885, time 0.00 
iteration 2023, loss 0.2687, train acc 0.885, time 0.00 
iteration 2024, loss 0.2687, train acc 0.885, time 0.00 
iteration 2025, loss 0.2687, train acc 0.885, time 0.00 
iteration 2026, loss 0.2687, train acc 0.885, time 0.00 
iteration 2027, loss 0.2687, train acc 0.885, time 0.00 
iteration 2028, loss 0.2687, train acc 0.885, time 0.00 
iteration 2029, loss 0.2687, tr

iteration 2332, loss 0.2663, train acc 0.885, time 0.00 
iteration 2333, loss 0.2663, train acc 0.885, time 0.00 
iteration 2334, loss 0.2662, train acc 0.885, time 0.00 
iteration 2335, loss 0.2662, train acc 0.885, time 0.00 
iteration 2336, loss 0.2662, train acc 0.885, time 0.00 
iteration 2337, loss 0.2662, train acc 0.885, time 0.00 
iteration 2338, loss 0.2662, train acc 0.885, time 0.00 
iteration 2339, loss 0.2662, train acc 0.885, time 0.00 
iteration 2340, loss 0.2662, train acc 0.885, time 0.00 
iteration 2341, loss 0.2662, train acc 0.885, time 0.00 
iteration 2342, loss 0.2662, train acc 0.885, time 0.00 
iteration 2343, loss 0.2662, train acc 0.885, time 0.00 
iteration 2344, loss 0.2662, train acc 0.885, time 0.00 
iteration 2345, loss 0.2662, train acc 0.885, time 0.00 
iteration 2346, loss 0.2662, train acc 0.885, time 0.00 
iteration 2347, loss 0.2662, train acc 0.885, time 0.00 
iteration 2348, loss 0.2661, train acc 0.885, time 0.00 
iteration 2349, loss 0.2661, tr

iteration 2620, loss 0.2643, train acc 0.887, time 0.00 
iteration 2621, loss 0.2642, train acc 0.887, time 0.00 
iteration 2622, loss 0.2642, train acc 0.887, time 0.00 
iteration 2623, loss 0.2642, train acc 0.887, time 0.00 
iteration 2624, loss 0.2642, train acc 0.887, time 0.00 
iteration 2625, loss 0.2642, train acc 0.887, time 0.00 
iteration 2626, loss 0.2642, train acc 0.887, time 0.00 
iteration 2627, loss 0.2642, train acc 0.887, time 0.00 
iteration 2628, loss 0.2642, train acc 0.887, time 0.00 
iteration 2629, loss 0.2642, train acc 0.887, time 0.00 
iteration 2630, loss 0.2642, train acc 0.887, time 0.00 
iteration 2631, loss 0.2642, train acc 0.887, time 0.00 
iteration 2632, loss 0.2642, train acc 0.887, time 0.00 
iteration 2633, loss 0.2642, train acc 0.887, time 0.00 
iteration 2634, loss 0.2642, train acc 0.887, time 0.00 
iteration 2635, loss 0.2642, train acc 0.887, time 0.00 
iteration 2636, loss 0.2642, train acc 0.887, time 0.00 
iteration 2637, loss 0.2641, tr

iteration 2903, loss 0.2625, train acc 0.890, time 0.00 
iteration 2904, loss 0.2625, train acc 0.890, time 0.00 
iteration 2905, loss 0.2625, train acc 0.890, time 0.00 
iteration 2906, loss 0.2625, train acc 0.890, time 0.00 
iteration 2907, loss 0.2625, train acc 0.890, time 0.00 
iteration 2908, loss 0.2625, train acc 0.890, time 0.00 
iteration 2909, loss 0.2625, train acc 0.890, time 0.00 
iteration 2910, loss 0.2625, train acc 0.890, time 0.00 
iteration 2911, loss 0.2625, train acc 0.890, time 0.00 
iteration 2912, loss 0.2625, train acc 0.890, time 0.00 
iteration 2913, loss 0.2625, train acc 0.890, time 0.00 
iteration 2914, loss 0.2625, train acc 0.890, time 0.00 
iteration 2915, loss 0.2624, train acc 0.890, time 0.00 
iteration 2916, loss 0.2624, train acc 0.890, time 0.00 
iteration 2917, loss 0.2624, train acc 0.890, time 0.00 
iteration 2918, loss 0.2624, train acc 0.890, time 0.00 
iteration 2919, loss 0.2624, train acc 0.890, time 0.00 
iteration 2920, loss 0.2624, tr

iteration 3182, loss 0.2610, train acc 0.892, time 0.00 
iteration 3183, loss 0.2610, train acc 0.892, time 0.00 
iteration 3184, loss 0.2610, train acc 0.892, time 0.00 
iteration 3185, loss 0.2610, train acc 0.892, time 0.00 
iteration 3186, loss 0.2610, train acc 0.892, time 0.00 
iteration 3187, loss 0.2610, train acc 0.892, time 0.00 
iteration 3188, loss 0.2610, train acc 0.892, time 0.00 
iteration 3189, loss 0.2610, train acc 0.892, time 0.00 
iteration 3190, loss 0.2610, train acc 0.892, time 0.00 
iteration 3191, loss 0.2610, train acc 0.892, time 0.00 
iteration 3192, loss 0.2609, train acc 0.892, time 0.00 
iteration 3193, loss 0.2609, train acc 0.892, time 0.00 
iteration 3194, loss 0.2609, train acc 0.892, time 0.00 
iteration 3195, loss 0.2609, train acc 0.892, time 0.00 
iteration 3196, loss 0.2609, train acc 0.892, time 0.00 
iteration 3197, loss 0.2609, train acc 0.892, time 0.00 
iteration 3198, loss 0.2609, train acc 0.895, time 0.00 
iteration 3199, loss 0.2609, tr

iteration 3464, loss 0.2596, train acc 0.897, time 0.00 
iteration 3465, loss 0.2596, train acc 0.897, time 0.00 
iteration 3466, loss 0.2596, train acc 0.897, time 0.00 
iteration 3467, loss 0.2596, train acc 0.897, time 0.00 
iteration 3468, loss 0.2596, train acc 0.897, time 0.00 
iteration 3469, loss 0.2596, train acc 0.897, time 0.00 
iteration 3470, loss 0.2596, train acc 0.897, time 0.00 
iteration 3471, loss 0.2596, train acc 0.897, time 0.00 
iteration 3472, loss 0.2596, train acc 0.897, time 0.00 
iteration 3473, loss 0.2596, train acc 0.897, time 0.00 
iteration 3474, loss 0.2596, train acc 0.897, time 0.00 
iteration 3475, loss 0.2596, train acc 0.897, time 0.00 
iteration 3476, loss 0.2596, train acc 0.897, time 0.00 
iteration 3477, loss 0.2596, train acc 0.897, time 0.00 
iteration 3478, loss 0.2596, train acc 0.897, time 0.00 
iteration 3479, loss 0.2596, train acc 0.897, time 0.00 
iteration 3480, loss 0.2596, train acc 0.897, time 0.00 
iteration 3481, loss 0.2596, tr

iteration 3749, loss 0.2584, train acc 0.900, time 0.00 
iteration 3750, loss 0.2584, train acc 0.900, time 0.00 
iteration 3751, loss 0.2584, train acc 0.900, time 0.00 
iteration 3752, loss 0.2584, train acc 0.900, time 0.00 
iteration 3753, loss 0.2584, train acc 0.900, time 0.00 
iteration 3754, loss 0.2584, train acc 0.900, time 0.00 
iteration 3755, loss 0.2584, train acc 0.900, time 0.00 
iteration 3756, loss 0.2584, train acc 0.900, time 0.00 
iteration 3757, loss 0.2584, train acc 0.900, time 0.00 
iteration 3758, loss 0.2584, train acc 0.900, time 0.00 
iteration 3759, loss 0.2584, train acc 0.900, time 0.00 
iteration 3760, loss 0.2583, train acc 0.900, time 0.00 
iteration 3761, loss 0.2583, train acc 0.900, time 0.00 
iteration 3762, loss 0.2583, train acc 0.900, time 0.00 
iteration 3763, loss 0.2583, train acc 0.900, time 0.00 
iteration 3764, loss 0.2583, train acc 0.900, time 0.00 
iteration 3765, loss 0.2583, train acc 0.900, time 0.00 
iteration 3766, loss 0.2583, tr

iteration 4014, loss 0.2574, train acc 0.902, time 0.00 
iteration 4015, loss 0.2574, train acc 0.902, time 0.00 
iteration 4016, loss 0.2574, train acc 0.902, time 0.00 
iteration 4017, loss 0.2573, train acc 0.902, time 0.00 
iteration 4018, loss 0.2573, train acc 0.902, time 0.00 
iteration 4019, loss 0.2573, train acc 0.902, time 0.00 
iteration 4020, loss 0.2573, train acc 0.902, time 0.00 
iteration 4021, loss 0.2573, train acc 0.902, time 0.00 
iteration 4022, loss 0.2573, train acc 0.902, time 0.00 
iteration 4023, loss 0.2573, train acc 0.902, time 0.00 
iteration 4024, loss 0.2573, train acc 0.902, time 0.00 
iteration 4025, loss 0.2573, train acc 0.902, time 0.00 
iteration 4026, loss 0.2573, train acc 0.902, time 0.00 
iteration 4027, loss 0.2573, train acc 0.902, time 0.00 
iteration 4028, loss 0.2573, train acc 0.902, time 0.00 
iteration 4029, loss 0.2573, train acc 0.902, time 0.00 
iteration 4030, loss 0.2573, train acc 0.902, time 0.00 
iteration 4031, loss 0.2573, tr

iteration 4305, loss 0.2563, train acc 0.905, time 0.00 
iteration 4306, loss 0.2563, train acc 0.905, time 0.00 
iteration 4307, loss 0.2563, train acc 0.905, time 0.00 
iteration 4308, loss 0.2563, train acc 0.905, time 0.00 
iteration 4309, loss 0.2563, train acc 0.905, time 0.00 
iteration 4310, loss 0.2563, train acc 0.905, time 0.00 
iteration 4311, loss 0.2563, train acc 0.905, time 0.00 
iteration 4312, loss 0.2563, train acc 0.905, time 0.00 
iteration 4313, loss 0.2563, train acc 0.905, time 0.00 
iteration 4314, loss 0.2563, train acc 0.905, time 0.00 
iteration 4315, loss 0.2563, train acc 0.905, time 0.00 
iteration 4316, loss 0.2563, train acc 0.905, time 0.00 
iteration 4317, loss 0.2563, train acc 0.905, time 0.00 
iteration 4318, loss 0.2563, train acc 0.905, time 0.00 
iteration 4319, loss 0.2563, train acc 0.905, time 0.00 
iteration 4320, loss 0.2563, train acc 0.905, time 0.00 
iteration 4321, loss 0.2563, train acc 0.905, time 0.00 
iteration 4322, loss 0.2563, tr

iteration 4574, loss 0.2555, train acc 0.908, time 0.00 
iteration 4575, loss 0.2555, train acc 0.908, time 0.00 
iteration 4576, loss 0.2555, train acc 0.908, time 0.00 
iteration 4577, loss 0.2555, train acc 0.908, time 0.00 
iteration 4578, loss 0.2555, train acc 0.908, time 0.00 
iteration 4579, loss 0.2555, train acc 0.908, time 0.00 
iteration 4580, loss 0.2555, train acc 0.908, time 0.00 
iteration 4581, loss 0.2555, train acc 0.908, time 0.00 
iteration 4582, loss 0.2555, train acc 0.908, time 0.00 
iteration 4583, loss 0.2554, train acc 0.908, time 0.00 
iteration 4584, loss 0.2554, train acc 0.908, time 0.00 
iteration 4585, loss 0.2554, train acc 0.908, time 0.00 
iteration 4586, loss 0.2554, train acc 0.908, time 0.00 
iteration 4587, loss 0.2554, train acc 0.908, time 0.00 
iteration 4588, loss 0.2554, train acc 0.908, time 0.00 
iteration 4589, loss 0.2554, train acc 0.908, time 0.00 
iteration 4590, loss 0.2554, train acc 0.908, time 0.00 
iteration 4591, loss 0.2554, tr

iteration 4861, loss 0.2546, train acc 0.908, time 0.00 
iteration 4862, loss 0.2546, train acc 0.908, time 0.00 
iteration 4863, loss 0.2546, train acc 0.908, time 0.00 
iteration 4864, loss 0.2546, train acc 0.908, time 0.00 
iteration 4865, loss 0.2546, train acc 0.908, time 0.00 
iteration 4866, loss 0.2546, train acc 0.908, time 0.00 
iteration 4867, loss 0.2546, train acc 0.908, time 0.00 
iteration 4868, loss 0.2546, train acc 0.908, time 0.00 
iteration 4869, loss 0.2546, train acc 0.908, time 0.00 
iteration 4870, loss 0.2546, train acc 0.908, time 0.00 
iteration 4871, loss 0.2546, train acc 0.908, time 0.00 
iteration 4872, loss 0.2546, train acc 0.908, time 0.00 
iteration 4873, loss 0.2546, train acc 0.908, time 0.00 
iteration 4874, loss 0.2546, train acc 0.908, time 0.00 
iteration 4875, loss 0.2546, train acc 0.908, time 0.00 
iteration 4876, loss 0.2546, train acc 0.908, time 0.00 
iteration 4877, loss 0.2546, train acc 0.908, time 0.00 
iteration 4878, loss 0.2546, tr