In [1]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
from torch import nn, optim
import itertools
from tqdm import tqdm

In [2]:
transform = transforms.Compose([
                              transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

In [3]:
size_hidden_layer = [5, 10,50, 100, 500, 1000]
size_train_iter = [10,20, 50, 100, 250, 500, 750, 1000, 1500, 2000]
full_accur_list = []
full_time_list = []

In [4]:
trainset = datasets.MNIST('PATH_TO_STORE_TRAINSET', download=True, train=True, transform=transform)
valset = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
for N in tqdm(size_hidden_layer):
    
    accur_list = []
    time_list = []
    
    for n_training in tqdm(size_train_iter):
        input_size = 784
        hidden_sizes = [N, N]
        output_size = 10

        model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                              nn.ReLU(),
                              nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                              nn.ReLU(),
                              nn.Linear(hidden_sizes[1], output_size),
                              nn.LogSoftmax(dim=1))
        model.to(device)
        print(model)
        criterion = nn.NLLLoss() #nn.MSELoss()

        optimizer = optim.Adam(model.parameters(),lr=0.001)
        time0 = time()
        epochs = 15
        for e in range(epochs):
            running_loss = 0
            for images, labels in itertools.islice(trainloader, n_training):
                # Flatten MNIST images into a 784 long vector
                images, labels = images.to(device), labels.to(device)
                images = images.view(images.shape[0], -1)

                # Training pass
                optimizer.zero_grad()

                output = model(images)
                loss = criterion(output, labels)

                #This is where the model learns by backpropagating
                loss.backward()

                #And optimizes its weights here
                optimizer.step()

                running_loss += loss.item()
            else:
                print("Epoch {} - Training loss: {}".format(e, running_loss/len(trainloader)))

        print("\nTraining Time (in minutes) =",(time()-time0)/60)
        time_list += [(time()-time0)/60]
        correct_count, all_count = 0, 0
        for images,labels in valloader:
            images, labels = images.to(device), labels.to(device)
            for i in range(len(labels)):
                img = images[i].view(1, 784)
                with torch.no_grad():
                    logps = model(img)


                ps = torch.exp(logps)
                probab = list(ps.cpu().numpy()[0])
                pred_label = probab.index(max(probab))
                true_label = labels.cpu().numpy()[i]
                if(true_label == pred_label):
                    correct_count += 1
                all_count += 1

        print("Number Of Images Tested =", all_count)
        print("\nModel Accuracy =", (correct_count/all_count))
        accur_list += [correct_count/all_count]

    full_accur_list += [accur_list]
    full_time_list += [time_list]
    np.savetxt('full_accur_list2.txt', np.array(full_accur_list))
    np.savetxt('full_time_list2.txt', np.array(full_time_list))
    np.savetxt('size_hidden_layer2.txt', np.array(size_hidden_layer))
    np.savetxt('size_train_iter2.txt', np.array(size_train_iter))

  0%|                                                                                            | 0/6 [00:00<?, ?it/s]
  0%|                                                                                            | 0/8 [00:00<?, ?it/s][A

Sequential(
  (0): Linear(in_features=784, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=5, bias=True)
  (3): ReLU()
  (4): Linear(in_features=5, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.02452894492444199
Epoch 1 - Training loss: 0.024206409576350948
Epoch 2 - Training loss: 0.023988145755043926
Epoch 3 - Training loss: 0.02362472212899214
Epoch 4 - Training loss: 0.023251470980613725
Epoch 5 - Training loss: 0.023154915777096618
Epoch 6 - Training loss: 0.02279244404611811
Epoch 7 - Training loss: 0.02278050227460068
Epoch 8 - Training loss: 0.022015790059876594
Epoch 9 - Training loss: 0.022501572108726258
Epoch 10 - Training loss: 0.022088612066403127
Epoch 11 - Training loss: 0.022120733886385267
Epoch 12 - Training loss: 0.021800514350313622
Epoch 13 - Training loss: 0.021830124006088355
Epoch 14 - Training loss: 0.022220685792121806

Training Time (in minutes) = 0.0997063954671224



 12%|██████████▌                                                                         | 1/8 [00:25<03:01, 25.97s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.2488
Sequential(
  (0): Linear(in_features=784, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=5, bias=True)
  (3): ReLU()
  (4): Linear(in_features=5, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.23212484332290031
Epoch 1 - Training loss: 0.20566337156905803
Epoch 2 - Training loss: 0.18826534308350162
Epoch 3 - Training loss: 0.17171002146023423
Epoch 4 - Training loss: 0.1605145007308358
Epoch 5 - Training loss: 0.15573308640705752
Epoch 6 - Training loss: 0.15043291951547552
Epoch 7 - Training loss: 0.14456795870876515
Epoch 8 - Training loss: 0.14090484063000058
Epoch 9 - Training loss: 0.14051249909248434
Epoch 10 - Training loss: 0.1350985844252206
Epoch 11 - Training loss: 0.13575619185911314
Epoch 12 - Training loss: 0.13520916463978
Epoch 13 - Training loss: 0.13324095496236643
Epoch 14 - Training loss: 0.1309181810187887

Training Time (in minutes) = 0.8042


 25%|█████████████████████                                                               | 2/8 [01:28<03:41, 36.93s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.5875
Sequential(
  (0): Linear(in_features=784, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=5, bias=True)
  (3): ReLU()
  (4): Linear(in_features=5, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.9009695654230585
Epoch 1 - Training loss: 0.5864715546941452
Epoch 2 - Training loss: 0.5040168432411609
Epoch 3 - Training loss: 0.4804478038602801
Epoch 4 - Training loss: 0.46390841253149484
Epoch 5 - Training loss: 0.44872230516949185
Epoch 6 - Training loss: 0.4358323819792347
Epoch 7 - Training loss: 0.4288355802167962
Epoch 8 - Training loss: 0.42281787068859095
Epoch 9 - Training loss: 0.4141959901303371
Epoch 10 - Training loss: 0.4124045180740641
Epoch 11 - Training loss: 0.40612679224278625
Epoch 12 - Training loss: 0.39862431951169014
Epoch 13 - Training loss: 0.39279314767577245
Epoch 14 - Training loss: 0.3906436224481953

Training Time (in minutes) = 2.7312195


 38%|███████████████████████████████▌                                                    | 3/8 [04:25<06:34, 78.90s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.7789
Sequential(
  (0): Linear(in_features=784, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=5, bias=True)
  (3): ReLU()
  (4): Linear(in_features=5, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 1.708795567057026
Epoch 1 - Training loss: 1.2413230570775868
Epoch 2 - Training loss: 1.0933123133711216
Epoch 3 - Training loss: 1.0377409593509968
Epoch 4 - Training loss: 1.0032841590548884
Epoch 5 - Training loss: 0.9834379682789988
Epoch 6 - Training loss: 0.9668879496898732
Epoch 7 - Training loss: 0.9577334401195746
Epoch 8 - Training loss: 0.9511056450257169
Epoch 9 - Training loss: 0.9445246812631326
Epoch 10 - Training loss: 0.9421042034239657
Epoch 11 - Training loss: 0.936389916677719
Epoch 12 - Training loss: 0.9358679190881725
Epoch 13 - Training loss: 0.9332832425896292
Epoch 14 - Training loss: 0.931477466340004

Training Time (in minutes) = 3.3041895389556886


 50%|█████████████████████████████████████████▌                                         | 4/8 [07:50<07:47, 116.85s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.7022
Sequential(
  (0): Linear(in_features=784, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=5, bias=True)
  (3): ReLU()
  (4): Linear(in_features=5, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 1.6390454381513697
Epoch 1 - Training loss: 1.0834386472000497
Epoch 2 - Training loss: 0.8937150082354353
Epoch 3 - Training loss: 0.8056260223137036
Epoch 4 - Training loss: 0.7361111252673908
Epoch 5 - Training loss: 0.6957817949783573
Epoch 6 - Training loss: 0.6776307258588165
Epoch 7 - Training loss: 0.6667340752730238
Epoch 8 - Training loss: 0.6573065015743537
Epoch 9 - Training loss: 0.6488503249787064
Epoch 10 - Training loss: 0.6436350144175832
Epoch 11 - Training loss: 0.6380207920824287
Epoch 12 - Training loss: 0.6350344370867906
Epoch 13 - Training loss: 0.6303954496185409
Epoch 14 - Training loss: 0.6279540820988511

Training Time (in minutes) = 2.7943393826484


 62%|███████████████████████████████████████████████████▉                               | 5/8 [10:45<06:42, 134.29s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.8128
Sequential(
  (0): Linear(in_features=784, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=5, bias=True)
  (3): ReLU()
  (4): Linear(in_features=5, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 2.005977064688831
Epoch 1 - Training loss: 1.8479696658374405
Epoch 2 - Training loss: 1.7639186811853829
Epoch 3 - Training loss: 1.7082873371872567
Epoch 4 - Training loss: 1.664899352516955
Epoch 5 - Training loss: 1.6311142068427762
Epoch 6 - Training loss: 1.5977530934409039
Epoch 7 - Training loss: 1.5673478662586415
Epoch 8 - Training loss: 1.5443068053930806
Epoch 9 - Training loss: 1.5254283042858914
Epoch 10 - Training loss: 1.512009448461187
Epoch 11 - Training loss: 1.5025300020093857
Epoch 12 - Training loss: 1.4954836613842164
Epoch 13 - Training loss: 1.49189605209619
Epoch 14 - Training loss: 1.486837647871168

Training Time (in minutes) = 2.7994651595751443



 75%|██████████████████████████████████████████████████████████████▎                    | 6/8 [13:40<04:53, 146.58s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.3876
Sequential(
  (0): Linear(in_features=784, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=5, bias=True)
  (3): ReLU()
  (4): Linear(in_features=5, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 1.6476473307558723
Epoch 1 - Training loss: 1.2504175844223007
Epoch 2 - Training loss: 1.136883147362707
Epoch 3 - Training loss: 1.0646125200206538
Epoch 4 - Training loss: 1.019688290954907
Epoch 5 - Training loss: 0.9915995203228648
Epoch 6 - Training loss: 0.9706351405013599
Epoch 7 - Training loss: 0.9586903456685893
Epoch 8 - Training loss: 0.9437027020749252
Epoch 9 - Training loss: 0.9365078975269789
Epoch 10 - Training loss: 0.928313069887507
Epoch 11 - Training loss: 0.9234876187879648
Epoch 12 - Training loss: 0.9167834974046963
Epoch 13 - Training loss: 0.9130754218553938
Epoch 14 - Training loss: 0.9081407566187478

Training Time (in minutes) = 2.868638507525126



 88%|████████████████████████████████████████████████████████████████████████▋          | 7/8 [16:40<02:36, 156.43s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.7071
Sequential(
  (0): Linear(in_features=784, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=5, bias=True)
  (3): ReLU()
  (4): Linear(in_features=5, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 1.5846816596191828
Epoch 1 - Training loss: 1.273632876717968
Epoch 2 - Training loss: 1.0836140446063043
Epoch 3 - Training loss: 0.9425021607611479
Epoch 4 - Training loss: 0.8524218920959847
Epoch 5 - Training loss: 0.8018838707953374
Epoch 6 - Training loss: 0.7798441576360385
Epoch 7 - Training loss: 0.7644454080031625
Epoch 8 - Training loss: 0.7560397228007631
Epoch 9 - Training loss: 0.7471457664836952
Epoch 10 - Training loss: 0.7411152479300367
Epoch 11 - Training loss: 0.7389682585369549
Epoch 12 - Training loss: 0.7333030629831591
Epoch 13 - Training loss: 0.731913141032526
Epoch 14 - Training loss: 0.7284344577713053

Training Time (in minutes) = 2.79892870982488



100%|███████████████████████████████████████████████████████████████████████████████████| 8/8 [19:35<00:00, 146.93s/it][A
 17%|█████████████▎                                                                  | 1/6 [19:35<1:37:57, 1175.44s/it]
  0%|                                                                                            | 0/8 [00:00<?, ?it/s][A

Number Of Images Tested = 10000

Model Accuracy = 0.7625
Sequential(
  (0): Linear(in_features=784, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): ReLU()
  (4): Linear(in_features=10, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.024424715845315442
Epoch 1 - Training loss: 0.02384687435906579
Epoch 2 - Training loss: 0.023296641642605063
Epoch 3 - Training loss: 0.022740967746482474
Epoch 4 - Training loss: 0.021633780460113654
Epoch 5 - Training loss: 0.021313926050149555
Epoch 6 - Training loss: 0.020310896164827
Epoch 7 - Training loss: 0.01996137160482183
Epoch 8 - Training loss: 0.018662811850688098
Epoch 9 - Training loss: 0.01850364444606594
Epoch 10 - Training loss: 0.017452717589925348
Epoch 11 - Training loss: 0.017228683810244238
Epoch 12 - Training loss: 0.01642449387609323
Epoch 13 - Training loss: 0.0158981192849084
Epoch 14 - Training loss: 0.015372268935002244

Training Time (in m


 12%|██████████▌                                                                         | 1/8 [00:09<01:03,  9.08s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.5277
Sequential(
  (0): Linear(in_features=784, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): ReLU()
  (4): Linear(in_features=10, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.2011251447043185
Epoch 1 - Training loss: 0.1295148587938565
Epoch 2 - Training loss: 0.1004449455087373
Epoch 3 - Training loss: 0.08328303033863303
Epoch 4 - Training loss: 0.07047343295393214
Epoch 5 - Training loss: 0.06424404909488743
Epoch 6 - Training loss: 0.05967591995242308
Epoch 7 - Training loss: 0.05697243071314114
Epoch 8 - Training loss: 0.05418446208876587
Epoch 9 - Training loss: 0.05096606700532218
Epoch 10 - Training loss: 0.05075738682294451
Epoch 11 - Training loss: 0.04698616507719321
Epoch 12 - Training loss: 0.04429137849731486
Epoch 13 - Training loss: 0.04419453229222979
Epoch 14 - Training loss: 0.04599448144118161

Training Time (in minutes) =


 25%|█████████████████████                                                               | 2/8 [00:34<01:23, 13.94s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.8783
Sequential(
  (0): Linear(in_features=784, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): ReLU()
  (4): Linear(in_features=10, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.7838950241679576
Epoch 1 - Training loss: 0.5782347530571382
Epoch 2 - Training loss: 0.5325613452046156
Epoch 3 - Training loss: 0.4969885851274421
Epoch 4 - Training loss: 0.4762635807365751
Epoch 5 - Training loss: 0.4571637655181417
Epoch 6 - Training loss: 0.4391182689651497
Epoch 7 - Training loss: 0.4170305361943458
Epoch 8 - Training loss: 0.3932625625624077
Epoch 9 - Training loss: 0.3827919953349811
Epoch 10 - Training loss: 0.3705050761320952
Epoch 11 - Training loss: 0.3590045810889588
Epoch 12 - Training loss: 0.34835682250162175
Epoch 13 - Training loss: 0.34410941013013885
Epoch 14 - Training loss: 0.3451132411514518

Training Time (in minutes) = 1.4961026


 38%|███████████████████████████████▌                                                    | 3/8 [02:11<03:14, 38.84s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.8023
Sequential(
  (0): Linear(in_features=784, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): ReLU()
  (4): Linear(in_features=10, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.840558231575911
Epoch 1 - Training loss: 0.4483708816963726
Epoch 2 - Training loss: 0.38596909850645167
Epoch 3 - Training loss: 0.35856394288636473
Epoch 4 - Training loss: 0.34514618815103576
Epoch 5 - Training loss: 0.3341777032610577
Epoch 6 - Training loss: 0.327667009434911
Epoch 7 - Training loss: 0.3137615101376195
Epoch 8 - Training loss: 0.30349822960365047
Epoch 9 - Training loss: 0.29784137458562343
Epoch 10 - Training loss: 0.2931638325868385
Epoch 11 - Training loss: 0.2894168969696519
Epoch 12 - Training loss: 0.28522520903140497
Epoch 13 - Training loss: 0.28128119568421894
Epoch 14 - Training loss: 0.27679958207203126

Training Time (in minutes) = 2.800


 50%|██████████████████████████████████████████                                          | 4/8 [05:06<05:18, 79.74s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9205
Sequential(
  (0): Linear(in_features=784, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): ReLU()
  (4): Linear(in_features=10, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 1.0369304402677744
Epoch 1 - Training loss: 0.6652138753931152
Epoch 2 - Training loss: 0.5802495997152857
Epoch 3 - Training loss: 0.5252443031906319
Epoch 4 - Training loss: 0.4913270829487711
Epoch 5 - Training loss: 0.47057541667906716
Epoch 6 - Training loss: 0.4594634085226415
Epoch 7 - Training loss: 0.4482631597882395
Epoch 8 - Training loss: 0.43893996419619397
Epoch 9 - Training loss: 0.43295499172482665
Epoch 10 - Training loss: 0.42829191085816953
Epoch 11 - Training loss: 0.4250083838952884
Epoch 12 - Training loss: 0.4211907010120369
Epoch 13 - Training loss: 0.41739886692528533
Epoch 14 - Training loss: 0.4161148183762646

Training Time (in minutes) = 2.8041


 62%|███████████████████████████████████████████████████▉                               | 5/8 [08:01<05:25, 108.46s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.8737
Sequential(
  (0): Linear(in_features=784, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): ReLU()
  (4): Linear(in_features=10, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.7998376075329303
Epoch 1 - Training loss: 0.4383107930548918
Epoch 2 - Training loss: 0.37841963151624713
Epoch 3 - Training loss: 0.348153179928438
Epoch 4 - Training loss: 0.32978305807595315
Epoch 5 - Training loss: 0.31842422702991124
Epoch 6 - Training loss: 0.30671171640663514
Epoch 7 - Training loss: 0.2998024723581922
Epoch 8 - Training loss: 0.29332511563068514
Epoch 9 - Training loss: 0.28669432082028784
Epoch 10 - Training loss: 0.28117031283946686
Epoch 11 - Training loss: 0.2768133634617969
Epoch 12 - Training loss: 0.2715993851010225
Epoch 13 - Training loss: 0.2659341626163167
Epoch 14 - Training loss: 0.26330577198074445

Training Time (in minutes) = 2.79


 75%|██████████████████████████████████████████████████████████████▎                    | 6/8 [10:56<04:16, 128.40s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9173
Sequential(
  (0): Linear(in_features=784, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): ReLU()
  (4): Linear(in_features=10, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.7161613767271611
Epoch 1 - Training loss: 0.3741158674250661
Epoch 2 - Training loss: 0.3349771401616556
Epoch 3 - Training loss: 0.3111296199214484
Epoch 4 - Training loss: 0.2966357154219644
Epoch 5 - Training loss: 0.2862748114833001
Epoch 6 - Training loss: 0.2775276144430327
Epoch 7 - Training loss: 0.26986132847315975
Epoch 8 - Training loss: 0.2628122286350806
Epoch 9 - Training loss: 0.2588734919272824
Epoch 10 - Training loss: 0.2516384375875375
Epoch 11 - Training loss: 0.245904948141402
Epoch 12 - Training loss: 0.2410083088451929
Epoch 13 - Training loss: 0.2394898421546099
Epoch 14 - Training loss: 0.2363204243642562

Training Time (in minutes) = 2.804997694


 88%|████████████████████████████████████████████████████████████████████████▋          | 7/8 [13:52<02:22, 142.50s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9301
Sequential(
  (0): Linear(in_features=784, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): ReLU()
  (4): Linear(in_features=10, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 1.14069930351238
Epoch 1 - Training loss: 0.6700533727593005
Epoch 2 - Training loss: 0.5809500956141365
Epoch 3 - Training loss: 0.5284551501846009
Epoch 4 - Training loss: 0.49489204544248355
Epoch 5 - Training loss: 0.4755089084572121
Epoch 6 - Training loss: 0.4583359291749214
Epoch 7 - Training loss: 0.44647359110907453
Epoch 8 - Training loss: 0.4387504196783373
Epoch 9 - Training loss: 0.43457871370478224
Epoch 10 - Training loss: 0.4298667020635056
Epoch 11 - Training loss: 0.42330207556549676
Epoch 12 - Training loss: 0.42054474588904556
Epoch 13 - Training loss: 0.41714386721409713
Epoch 14 - Training loss: 0.4158352212801671

Training Time (in minutes) = 2.79913


100%|███████████████████████████████████████████████████████████████████████████████████| 8/8 [16:47<00:00, 125.91s/it][A
 33%|██████████████████████████▋                                                     | 2/6 [36:22<1:15:00, 1125.00s/it]
  0%|                                                                                            | 0/8 [00:00<?, ?it/s][A

Number Of Images Tested = 10000

Model Accuracy = 0.8709
Sequential(
  (0): Linear(in_features=784, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=50, bias=True)
  (3): ReLU()
  (4): Linear(in_features=50, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.023840835099535457
Epoch 1 - Training loss: 0.02181568099999987
Epoch 2 - Training loss: 0.01884927576793028
Epoch 3 - Training loss: 0.015589156765927639
Epoch 4 - Training loss: 0.012845986687552446
Epoch 5 - Training loss: 0.011001095143970904
Epoch 6 - Training loss: 0.008828863088510185
Epoch 7 - Training loss: 0.007655276926849951
Epoch 8 - Training loss: 0.007142009638519938
Epoch 9 - Training loss: 0.006792441487058139
Epoch 10 - Training loss: 0.005866302133623217
Epoch 11 - Training loss: 0.006232682162764738
Epoch 12 - Training loss: 0.005474614023145582
Epoch 13 - Training loss: 0.005030588872397124
Epoch 14 - Training loss: 0.004926327544488887

Training Tim


 12%|██████████▌                                                                         | 1/8 [00:09<01:03,  9.01s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.8602
Sequential(
  (0): Linear(in_features=784, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=50, bias=True)
  (3): ReLU()
  (4): Linear(in_features=50, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.13058031939748507
Epoch 1 - Training loss: 0.05233086547109364
Epoch 2 - Training loss: 0.043965992785847265
Epoch 3 - Training loss: 0.040775765996497834
Epoch 4 - Training loss: 0.038949391480956254
Epoch 5 - Training loss: 0.03656760836715129
Epoch 6 - Training loss: 0.03241485578101327
Epoch 7 - Training loss: 0.032426945746008524
Epoch 8 - Training loss: 0.03126781228890043
Epoch 9 - Training loss: 0.029931193166005333
Epoch 10 - Training loss: 0.031451274535612764
Epoch 11 - Training loss: 0.029551858968063713
Epoch 12 - Training loss: 0.027701825427729438
Epoch 13 - Training loss: 0.027561329511691258
Epoch 14 - Training loss: 0.025679602265866325

Training Time (


 25%|█████████████████████                                                               | 2/8 [00:34<01:23, 13.99s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9293
Sequential(
  (0): Linear(in_features=784, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=50, bias=True)
  (3): ReLU()
  (4): Linear(in_features=50, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.3189069932140013
Epoch 1 - Training loss: 0.16611743352211106
Epoch 2 - Training loss: 0.13824202901105892
Epoch 3 - Training loss: 0.11711484981355255
Epoch 4 - Training loss: 0.10676802471994973
Epoch 5 - Training loss: 0.09373163645948047
Epoch 6 - Training loss: 0.0858424241219693
Epoch 7 - Training loss: 0.08270044176023143
Epoch 8 - Training loss: 0.07680765001663267
Epoch 9 - Training loss: 0.07070918492019684
Epoch 10 - Training loss: 0.0713561980057754
Epoch 11 - Training loss: 0.06643587047817992
Epoch 12 - Training loss: 0.06427976498201585
Epoch 13 - Training loss: 0.061133650379743915
Epoch 14 - Training loss: 0.05588159932574229

Training Time (in minutes) 


 38%|███████████████████████████████▌                                                    | 3/8 [02:11<03:14, 38.93s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9629
Sequential(
  (0): Linear(in_features=784, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=50, bias=True)
  (3): ReLU()
  (4): Linear(in_features=50, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.436463371459355
Epoch 1 - Training loss: 0.23146753389237404
Epoch 2 - Training loss: 0.17378926141334494
Epoch 3 - Training loss: 0.14226760612383707
Epoch 4 - Training loss: 0.12226136034326766
Epoch 5 - Training loss: 0.11030585012599223
Epoch 6 - Training loss: 0.09982621330998219
Epoch 7 - Training loss: 0.09238191687865362
Epoch 8 - Training loss: 0.08435675632685169
Epoch 9 - Training loss: 0.08099830040195857
Epoch 10 - Training loss: 0.07473170107552238
Epoch 11 - Training loss: 0.06906971633672587
Epoch 12 - Training loss: 0.06555774199095235
Epoch 13 - Training loss: 0.06296290171100323
Epoch 14 - Training loss: 0.059787581909074586

Training Time (in minutes)


 50%|██████████████████████████████████████████                                          | 4/8 [05:07<05:19, 79.87s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9723
Sequential(
  (0): Linear(in_features=784, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=50, bias=True)
  (3): ReLU()
  (4): Linear(in_features=50, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.45652790646404345
Epoch 1 - Training loss: 0.2461569659562825
Epoch 2 - Training loss: 0.1871578406764945
Epoch 3 - Training loss: 0.15125624256840012
Epoch 4 - Training loss: 0.13200341275871308
Epoch 5 - Training loss: 0.11902204299095406
Epoch 6 - Training loss: 0.1084919653292785
Epoch 7 - Training loss: 0.099276743656489
Epoch 8 - Training loss: 0.09537880696944082
Epoch 9 - Training loss: 0.08778124464862445
Epoch 10 - Training loss: 0.08278782643091831
Epoch 11 - Training loss: 0.07835074656132633
Epoch 12 - Training loss: 0.07326920871743992
Epoch 13 - Training loss: 0.06965729411044069
Epoch 14 - Training loss: 0.06810140791438472

Training Time (in minutes) = 2


 62%|███████████████████████████████████████████████████▉                               | 5/8 [08:02<05:25, 108.45s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9679
Sequential(
  (0): Linear(in_features=784, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=50, bias=True)
  (3): ReLU()
  (4): Linear(in_features=50, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.4709249368862811
Epoch 1 - Training loss: 0.2539894417770255
Epoch 2 - Training loss: 0.19823154750297953
Epoch 3 - Training loss: 0.16648643325640958
Epoch 4 - Training loss: 0.1428301306302423
Epoch 5 - Training loss: 0.12975515280641728
Epoch 6 - Training loss: 0.11669097404315401
Epoch 7 - Training loss: 0.10930676106363535
Epoch 8 - Training loss: 0.09918900747606749
Epoch 9 - Training loss: 0.0980099256037236
Epoch 10 - Training loss: 0.09017515457324636
Epoch 11 - Training loss: 0.08544351941526634
Epoch 12 - Training loss: 0.08095828665376727
Epoch 13 - Training loss: 0.07781593795857832
Epoch 14 - Training loss: 0.07477805199923673

Training Time (in minutes) = 


 75%|██████████████████████████████████████████████████████████████▎                    | 6/8 [10:57<04:17, 128.59s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9649
Sequential(
  (0): Linear(in_features=784, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=50, bias=True)
  (3): ReLU()
  (4): Linear(in_features=50, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.46369057904079014
Epoch 1 - Training loss: 0.24286117606055635
Epoch 2 - Training loss: 0.1893116119446785
Epoch 3 - Training loss: 0.15216542621141177
Epoch 4 - Training loss: 0.1296709778327455
Epoch 5 - Training loss: 0.11659323021983986
Epoch 6 - Training loss: 0.10794867302126276
Epoch 7 - Training loss: 0.09730776200996342
Epoch 8 - Training loss: 0.08869485130140395
Epoch 9 - Training loss: 0.08300156896906112
Epoch 10 - Training loss: 0.0804521305703008
Epoch 11 - Training loss: 0.07445865307154177
Epoch 12 - Training loss: 0.07169240165148368
Epoch 13 - Training loss: 0.06653821332427834
Epoch 14 - Training loss: 0.06334571254803047

Training Time (in minutes) =


 88%|████████████████████████████████████████████████████████████████████████▋          | 7/8 [13:53<02:22, 142.62s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9677
Sequential(
  (0): Linear(in_features=784, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=50, bias=True)
  (3): ReLU()
  (4): Linear(in_features=50, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.46899080386897646
Epoch 1 - Training loss: 0.2648481656350434
Epoch 2 - Training loss: 0.20622731107813336
Epoch 3 - Training loss: 0.17317225780488968
Epoch 4 - Training loss: 0.15080857198280312
Epoch 5 - Training loss: 0.13505360592148705
Epoch 6 - Training loss: 0.12281619746690747
Epoch 7 - Training loss: 0.11141114722667282
Epoch 8 - Training loss: 0.10538077144734641
Epoch 9 - Training loss: 0.09682495718293671
Epoch 10 - Training loss: 0.09258978390323519
Epoch 11 - Training loss: 0.08783873327688049
Epoch 12 - Training loss: 0.08301850000042905
Epoch 13 - Training loss: 0.07755797223420317
Epoch 14 - Training loss: 0.07512784815991103

Training Time (in minutes)


100%|███████████████████████████████████████████████████████████████████████████████████| 8/8 [16:48<00:00, 126.08s/it][A
 50%|█████████████████████████████████████████                                         | 3/6 [53:11<54:30, 1090.09s/it]
  0%|                                                                                            | 0/8 [00:00<?, ?it/s][A

Number Of Images Tested = 10000

Model Accuracy = 0.9696
Sequential(
  (0): Linear(in_features=784, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.023474259671371882
Epoch 1 - Training loss: 0.019951283550465795
Epoch 2 - Training loss: 0.016221454402785312
Epoch 3 - Training loss: 0.011729056837716337
Epoch 4 - Training loss: 0.009040488553708042
Epoch 5 - Training loss: 0.007519301320952393
Epoch 6 - Training loss: 0.006333850276495602
Epoch 7 - Training loss: 0.006793262957255723
Epoch 8 - Training loss: 0.005818137990386247
Epoch 9 - Training loss: 0.005976253862319978
Epoch 10 - Training loss: 0.005192428922602362
Epoch 11 - Training loss: 0.005055214963487979
Epoch 12 - Training loss: 0.005278963047558311
Epoch 13 - Training loss: 0.0045186252291522806
Epoch 14 - Training loss: 0.00447830710329735

Traini


 12%|██████████▌                                                                         | 1/8 [00:09<01:04,  9.22s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.8806
Sequential(
  (0): Linear(in_features=784, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.11322598616832863
Epoch 1 - Training loss: 0.04554210388774811
Epoch 2 - Training loss: 0.03966961160047985
Epoch 3 - Training loss: 0.03602645061671861
Epoch 4 - Training loss: 0.031158459009423947
Epoch 5 - Training loss: 0.029433085386559907
Epoch 6 - Training loss: 0.02670378968723293
Epoch 7 - Training loss: 0.02630849645288387
Epoch 8 - Training loss: 0.027391447481125403
Epoch 9 - Training loss: 0.022274356688867247
Epoch 10 - Training loss: 0.0212868988108851
Epoch 11 - Training loss: 0.020489873694204318
Epoch 12 - Training loss: 0.02019038560913443
Epoch 13 - Training loss: 0.02142716242449243
Epoch 14 - Training loss: 0.017050812060613114

Training Time (i


 25%|█████████████████████                                                               | 2/8 [00:34<01:24, 14.05s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9513
Sequential(
  (0): Linear(in_features=784, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.2687765205465654
Epoch 1 - Training loss: 0.1421767410351587
Epoch 2 - Training loss: 0.10699927701211687
Epoch 3 - Training loss: 0.08458641979660687
Epoch 4 - Training loss: 0.07474062436107379
Epoch 5 - Training loss: 0.06642941667485847
Epoch 6 - Training loss: 0.0595397678519617
Epoch 7 - Training loss: 0.054356669544427
Epoch 8 - Training loss: 0.05249670991646265
Epoch 9 - Training loss: 0.04703288554533649
Epoch 10 - Training loss: 0.04595894745783384
Epoch 11 - Training loss: 0.04119685513457891
Epoch 12 - Training loss: 0.03883230415131174
Epoch 13 - Training loss: 0.0359906500427803
Epoch 14 - Training loss: 0.03534480072716787

Training Time (in minutes) 


 38%|███████████████████████████████▌                                                    | 3/8 [02:11<03:14, 38.91s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9712
Sequential(
  (0): Linear(in_features=784, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.39450125046757495
Epoch 1 - Training loss: 0.186459532400756
Epoch 2 - Training loss: 0.1378882798328519
Epoch 3 - Training loss: 0.10975031465339635
Epoch 4 - Training loss: 0.09281784674919236
Epoch 5 - Training loss: 0.08584623944673583
Epoch 6 - Training loss: 0.07228643321660536
Epoch 7 - Training loss: 0.06806336759702801
Epoch 8 - Training loss: 0.057597447373370116
Epoch 9 - Training loss: 0.05630916435676597
Epoch 10 - Training loss: 0.04939160671339297
Epoch 11 - Training loss: 0.04652901231519767
Epoch 12 - Training loss: 0.044860267471561806
Epoch 13 - Training loss: 0.04003595028505492
Epoch 14 - Training loss: 0.036795150839102106

Training Time (in min


 50%|██████████████████████████████████████████                                          | 4/8 [05:06<05:19, 79.90s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9758
Sequential(
  (0): Linear(in_features=784, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.38739609975677564
Epoch 1 - Training loss: 0.18505776988894446
Epoch 2 - Training loss: 0.13334360714557011
Epoch 3 - Training loss: 0.10787820695305685
Epoch 4 - Training loss: 0.09114612055533348
Epoch 5 - Training loss: 0.07985667074158756
Epoch 6 - Training loss: 0.07341530981427952
Epoch 7 - Training loss: 0.06332330497652927
Epoch 8 - Training loss: 0.05767037625958734
Epoch 9 - Training loss: 0.05423267768906442
Epoch 10 - Training loss: 0.04728786160212272
Epoch 11 - Training loss: 0.04436239877890454
Epoch 12 - Training loss: 0.04167748405647748
Epoch 13 - Training loss: 0.03845823652494246
Epoch 14 - Training loss: 0.03573190268855899

Training Time (in min


 62%|███████████████████████████████████████████████████▉                               | 5/8 [08:02<05:26, 108.70s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9742
Sequential(
  (0): Linear(in_features=784, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.3791001322808296
Epoch 1 - Training loss: 0.17527199398750054
Epoch 2 - Training loss: 0.12919868296905876
Epoch 3 - Training loss: 0.10689547879776276
Epoch 4 - Training loss: 0.08972656180951069
Epoch 5 - Training loss: 0.08191426342595488
Epoch 6 - Training loss: 0.07304511520602523
Epoch 7 - Training loss: 0.06592729979994963
Epoch 8 - Training loss: 0.059016140463795747
Epoch 9 - Training loss: 0.0555027770335232
Epoch 10 - Training loss: 0.04970498628684008
Epoch 11 - Training loss: 0.04516804282779871
Epoch 12 - Training loss: 0.043487977935001254
Epoch 13 - Training loss: 0.04204337126009468
Epoch 14 - Training loss: 0.03613691471183478

Training Time (in min


 75%|██████████████████████████████████████████████████████████████▎                    | 6/8 [10:58<04:17, 128.84s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9728
Sequential(
  (0): Linear(in_features=784, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.38240675810875413
Epoch 1 - Training loss: 0.1832576737300292
Epoch 2 - Training loss: 0.13210857545992713
Epoch 3 - Training loss: 0.10741418948583702
Epoch 4 - Training loss: 0.09191239315635169
Epoch 5 - Training loss: 0.08027092638864383
Epoch 6 - Training loss: 0.07141080559161839
Epoch 7 - Training loss: 0.061992703621257854
Epoch 8 - Training loss: 0.06013967372131532
Epoch 9 - Training loss: 0.051271469086043235
Epoch 10 - Training loss: 0.049188781860294434
Epoch 11 - Training loss: 0.04498250132053296
Epoch 12 - Training loss: 0.04291038764273323
Epoch 13 - Training loss: 0.038824665904251626
Epoch 14 - Training loss: 0.033519173628771735

Training Time (in


 88%|████████████████████████████████████████████████████████████████████████▋          | 7/8 [13:54<02:22, 142.91s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9742
Sequential(
  (0): Linear(in_features=784, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.3877762776138241
Epoch 1 - Training loss: 0.18396069478394508
Epoch 2 - Training loss: 0.13855737402463264
Epoch 3 - Training loss: 0.11264592714544171
Epoch 4 - Training loss: 0.0955319936210508
Epoch 5 - Training loss: 0.08587399263705399
Epoch 6 - Training loss: 0.07671542054832553
Epoch 7 - Training loss: 0.06953230952577931
Epoch 8 - Training loss: 0.0627530833511655
Epoch 9 - Training loss: 0.05679832891360529
Epoch 10 - Training loss: 0.05316582630410282
Epoch 11 - Training loss: 0.0494094616385983
Epoch 12 - Training loss: 0.044618631383455766
Epoch 13 - Training loss: 0.043871801006975075
Epoch 14 - Training loss: 0.038364501068316924

Training Time (in minu


100%|███████████████████████████████████████████████████████████████████████████████████| 8/8 [16:50<00:00, 126.27s/it][A
 67%|█████████████████████████████████████████████████████▎                          | 4/6 [1:10:01<35:32, 1066.11s/it]
  0%|                                                                                            | 0/8 [00:00<?, ?it/s][A

Number Of Images Tested = 10000

Model Accuracy = 0.9739
Sequential(
  (0): Linear(in_features=784, out_features=500, bias=True)
  (1): ReLU()
  (2): Linear(in_features=500, out_features=500, bias=True)
  (3): ReLU()
  (4): Linear(in_features=500, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.020800836686132303
Epoch 1 - Training loss: 0.012317911012848811
Epoch 2 - Training loss: 0.008809971911058243
Epoch 3 - Training loss: 0.007042198928434457
Epoch 4 - Training loss: 0.005994691650496363
Epoch 5 - Training loss: 0.0057605855754697755
Epoch 6 - Training loss: 0.005085565388075579
Epoch 7 - Training loss: 0.005556736165272401
Epoch 8 - Training loss: 0.004390500406466567
Epoch 9 - Training loss: 0.004397428429711348
Epoch 10 - Training loss: 0.004631845649879878
Epoch 11 - Training loss: 0.004110925185527883
Epoch 12 - Training loss: 0.0044021101902797025
Epoch 13 - Training loss: 0.003928447519538245
Epoch 14 - Training loss: 0.0034139290579092273

Tra


 12%|██████████▌                                                                         | 1/8 [00:09<01:04,  9.16s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.8898
Sequential(
  (0): Linear(in_features=784, out_features=500, bias=True)
  (1): ReLU()
  (2): Linear(in_features=500, out_features=500, bias=True)
  (3): ReLU()
  (4): Linear(in_features=500, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.07675159301584972
Epoch 1 - Training loss: 0.03967666911926351
Epoch 2 - Training loss: 0.033171345747864324
Epoch 3 - Training loss: 0.024979231938687976
Epoch 4 - Training loss: 0.0247494971026172
Epoch 5 - Training loss: 0.02100760857465425
Epoch 6 - Training loss: 0.019196120319145322
Epoch 7 - Training loss: 0.02075418343406115
Epoch 8 - Training loss: 0.01743772559598692
Epoch 9 - Training loss: 0.017510501111843692
Epoch 10 - Training loss: 0.016600891455277197
Epoch 11 - Training loss: 0.014544415265830087
Epoch 12 - Training loss: 0.01488250320050508
Epoch 13 - Training loss: 0.013401562709416916
Epoch 14 - Training loss: 0.014263411169685027

Training Time 


 25%|█████████████████████                                                               | 2/8 [00:34<01:24, 14.09s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9578
Sequential(
  (0): Linear(in_features=784, out_features=500, bias=True)
  (1): ReLU()
  (2): Linear(in_features=500, out_features=500, bias=True)
  (3): ReLU()
  (4): Linear(in_features=500, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.19847676574167158
Epoch 1 - Training loss: 0.09638402599499805
Epoch 2 - Training loss: 0.07516528856255479
Epoch 3 - Training loss: 0.058817851114898984
Epoch 4 - Training loss: 0.05135292863485211
Epoch 5 - Training loss: 0.04654478055160882
Epoch 6 - Training loss: 0.043606725453869746
Epoch 7 - Training loss: 0.038058100296045415
Epoch 8 - Training loss: 0.036237145121743496
Epoch 9 - Training loss: 0.033195589782237245
Epoch 10 - Training loss: 0.029474394100068857
Epoch 11 - Training loss: 0.030090300106147588
Epoch 12 - Training loss: 0.027208117412716976
Epoch 13 - Training loss: 0.024784008807528502
Epoch 14 - Training loss: 0.02184040832923038

Training Tim


 38%|███████████████████████████████▌                                                    | 3/8 [02:12<03:16, 39.22s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.977
Sequential(
  (0): Linear(in_features=784, out_features=500, bias=True)
  (1): ReLU()
  (2): Linear(in_features=500, out_features=500, bias=True)
  (3): ReLU()
  (4): Linear(in_features=500, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.28923599098695874
Epoch 1 - Training loss: 0.13297364035291648
Epoch 2 - Training loss: 0.09941499897324518
Epoch 3 - Training loss: 0.08018243460774993
Epoch 4 - Training loss: 0.07102221140324243
Epoch 5 - Training loss: 0.060364510891820065
Epoch 6 - Training loss: 0.05502810109573514
Epoch 7 - Training loss: 0.04793340583214723
Epoch 8 - Training loss: 0.0431024172633235
Epoch 9 - Training loss: 0.04027470605058679
Epoch 10 - Training loss: 0.03618503082245747
Epoch 11 - Training loss: 0.03482034825670249
Epoch 12 - Training loss: 0.033378561565489656
Epoch 13 - Training loss: 0.027912311851164138
Epoch 14 - Training loss: 0.02866316372171235

Training Time (in mi


 50%|██████████████████████████████████████████                                          | 4/8 [05:09<05:22, 80.58s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9785
Sequential(
  (0): Linear(in_features=784, out_features=500, bias=True)
  (1): ReLU()
  (2): Linear(in_features=500, out_features=500, bias=True)
  (3): ReLU()
  (4): Linear(in_features=500, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.29298299311924336
Epoch 1 - Training loss: 0.13458236637733764
Epoch 2 - Training loss: 0.09981950031700673
Epoch 3 - Training loss: 0.081601103463931
Epoch 4 - Training loss: 0.06930771521898284
Epoch 5 - Training loss: 0.06134715763482648
Epoch 6 - Training loss: 0.05503137001612865
Epoch 7 - Training loss: 0.04576473468855055
Epoch 8 - Training loss: 0.041617919897426296
Epoch 9 - Training loss: 0.0403803883593029
Epoch 10 - Training loss: 0.0363200229060437
Epoch 11 - Training loss: 0.03652282420005689
Epoch 12 - Training loss: 0.0325230881661129
Epoch 13 - Training loss: 0.029806120041162093
Epoch 14 - Training loss: 0.02857122032333221

Training Time (in minute


 62%|███████████████████████████████████████████████████▉                               | 5/8 [08:06<05:27, 109.31s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9763
Sequential(
  (0): Linear(in_features=784, out_features=500, bias=True)
  (1): ReLU()
  (2): Linear(in_features=500, out_features=500, bias=True)
  (3): ReLU()
  (4): Linear(in_features=500, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.2815579511940098
Epoch 1 - Training loss: 0.13604526199114475
Epoch 2 - Training loss: 0.09957630092155005
Epoch 3 - Training loss: 0.0818524497892779
Epoch 4 - Training loss: 0.07154065397168909
Epoch 5 - Training loss: 0.06232871782820997
Epoch 6 - Training loss: 0.052267128925349536
Epoch 7 - Training loss: 0.04785645633503032
Epoch 8 - Training loss: 0.04302720812115588
Epoch 9 - Training loss: 0.03830779195705584
Epoch 10 - Training loss: 0.03815786409944391
Epoch 11 - Training loss: 0.03469316827665482
Epoch 12 - Training loss: 0.029644338860869535
Epoch 13 - Training loss: 0.033096258439171286
Epoch 14 - Training loss: 0.027361878606953473

Training Time (in m


 75%|██████████████████████████████████████████████████████████████▎                    | 6/8 [11:02<04:18, 129.46s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9808
Sequential(
  (0): Linear(in_features=784, out_features=500, bias=True)
  (1): ReLU()
  (2): Linear(in_features=500, out_features=500, bias=True)
  (3): ReLU()
  (4): Linear(in_features=500, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.29179105546269846
Epoch 1 - Training loss: 0.1335096589839662
Epoch 2 - Training loss: 0.10231842815891894
Epoch 3 - Training loss: 0.08513302725356525
Epoch 4 - Training loss: 0.06941453142881965
Epoch 5 - Training loss: 0.06312613720411876
Epoch 6 - Training loss: 0.052292865939311255
Epoch 7 - Training loss: 0.049987652801723875
Epoch 8 - Training loss: 0.04348054828483667
Epoch 9 - Training loss: 0.04101897856450951
Epoch 10 - Training loss: 0.03331213823553405
Epoch 11 - Training loss: 0.03532458109848662
Epoch 12 - Training loss: 0.033693754599729515
Epoch 13 - Training loss: 0.028801307687833747
Epoch 14 - Training loss: 0.03158824932354409

Training Time (in 


 88%|████████████████████████████████████████████████████████████████████████▋          | 7/8 [13:59<02:23, 143.64s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9741
Sequential(
  (0): Linear(in_features=784, out_features=500, bias=True)
  (1): ReLU()
  (2): Linear(in_features=500, out_features=500, bias=True)
  (3): ReLU()
  (4): Linear(in_features=500, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.288760322878864
Epoch 1 - Training loss: 0.1317595845537145
Epoch 2 - Training loss: 0.09914682691555414
Epoch 3 - Training loss: 0.07872370730783702
Epoch 4 - Training loss: 0.07152156526846354
Epoch 5 - Training loss: 0.05773309044945024
Epoch 6 - Training loss: 0.053067452948429246
Epoch 7 - Training loss: 0.04726502064591659
Epoch 8 - Training loss: 0.04039657291080525
Epoch 9 - Training loss: 0.042154410240381385
Epoch 10 - Training loss: 0.035291034740203224
Epoch 11 - Training loss: 0.034385632565701756
Epoch 12 - Training loss: 0.030641883410620633
Epoch 13 - Training loss: 0.028888659200259743
Epoch 14 - Training loss: 0.03086476597184343

Training Time (in 


100%|███████████████████████████████████████████████████████████████████████████████████| 8/8 [16:55<00:00, 126.97s/it][A
 83%|██████████████████████████████████████████████████████████████████▋             | 5/6 [1:26:57<17:31, 1051.02s/it]
  0%|                                                                                            | 0/8 [00:00<?, ?it/s][A

Number Of Images Tested = 10000

Model Accuracy = 0.9752
Sequential(
  (0): Linear(in_features=784, out_features=1000, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1000, out_features=1000, bias=True)
  (3): ReLU()
  (4): Linear(in_features=1000, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.018716375329601232
Epoch 1 - Training loss: 0.010303849159781612
Epoch 2 - Training loss: 0.006038703961667222
Epoch 3 - Training loss: 0.0058732079798733
Epoch 4 - Training loss: 0.005141867217478721
Epoch 5 - Training loss: 0.004311650387768044
Epoch 6 - Training loss: 0.0044671603198498805
Epoch 7 - Training loss: 0.004696197855447147
Epoch 8 - Training loss: 0.0047933603845425506
Epoch 9 - Training loss: 0.003862291542705951
Epoch 10 - Training loss: 0.004069915791945671
Epoch 11 - Training loss: 0.0046453471662901615
Epoch 12 - Training loss: 0.004383738043465848
Epoch 13 - Training loss: 0.0035770995212770474
Epoch 14 - Training loss: 0.0031117692843937415



 12%|██████████▌                                                                         | 1/8 [00:09<01:03,  9.11s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9074
Sequential(
  (0): Linear(in_features=784, out_features=1000, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1000, out_features=1000, bias=True)
  (3): ReLU()
  (4): Linear(in_features=1000, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.07113670254312852
Epoch 1 - Training loss: 0.03788445839114281
Epoch 2 - Training loss: 0.02968991190385717
Epoch 3 - Training loss: 0.02417553989871987
Epoch 4 - Training loss: 0.02299638971813452
Epoch 5 - Training loss: 0.021306779128370253
Epoch 6 - Training loss: 0.017942730853679593
Epoch 7 - Training loss: 0.019013661502012565
Epoch 8 - Training loss: 0.019274166898369027
Epoch 9 - Training loss: 0.01618220414052894
Epoch 10 - Training loss: 0.01567721453064413
Epoch 11 - Training loss: 0.01375249411060866
Epoch 12 - Training loss: 0.015196232657347406
Epoch 13 - Training loss: 0.01480685469549475
Epoch 14 - Training loss: 0.013337632823886393

Training Ti


 25%|█████████████████████                                                               | 2/8 [00:34<01:24, 14.02s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.95
Sequential(
  (0): Linear(in_features=784, out_features=1000, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1000, out_features=1000, bias=True)
  (3): ReLU()
  (4): Linear(in_features=1000, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.18531269151598279
Epoch 1 - Training loss: 0.09115359502465231
Epoch 2 - Training loss: 0.07223797603043666
Epoch 3 - Training loss: 0.06243194977262381
Epoch 4 - Training loss: 0.053344107673827135
Epoch 5 - Training loss: 0.044773975830040634
Epoch 6 - Training loss: 0.04305140199517009
Epoch 7 - Training loss: 0.03857466883298113
Epoch 8 - Training loss: 0.03517065270702595
Epoch 9 - Training loss: 0.03400856011640479
Epoch 10 - Training loss: 0.03240680385794022
Epoch 11 - Training loss: 0.027909491562655868
Epoch 12 - Training loss: 0.028795280882012424
Epoch 13 - Training loss: 0.025969774430510458
Epoch 14 - Training loss: 0.0241897334771624

Training Time (


 38%|███████████████████████████████▌                                                    | 3/8 [02:13<03:16, 39.36s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9766
Sequential(
  (0): Linear(in_features=784, out_features=1000, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1000, out_features=1000, bias=True)
  (3): ReLU()
  (4): Linear(in_features=1000, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.2686821312379481
Epoch 1 - Training loss: 0.13266761696883547
Epoch 2 - Training loss: 0.10094763816141689
Epoch 3 - Training loss: 0.08298979017303697
Epoch 4 - Training loss: 0.07001469632598764
Epoch 5 - Training loss: 0.057559871364996504
Epoch 6 - Training loss: 0.052798830446348324
Epoch 7 - Training loss: 0.048617951754631517
Epoch 8 - Training loss: 0.04170325290667874
Epoch 9 - Training loss: 0.039496708563022585
Epoch 10 - Training loss: 0.04025026391159052
Epoch 11 - Training loss: 0.03426014728991112
Epoch 12 - Training loss: 0.03398650136045508
Epoch 13 - Training loss: 0.031111941088054543
Epoch 14 - Training loss: 0.029352066553394392

Training Tim


 50%|██████████████████████████████████████████                                          | 4/8 [05:11<05:24, 81.10s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9772
Sequential(
  (0): Linear(in_features=784, out_features=1000, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1000, out_features=1000, bias=True)
  (3): ReLU()
  (4): Linear(in_features=1000, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.27691721061526586
Epoch 1 - Training loss: 0.13038082198619144
Epoch 2 - Training loss: 0.09808728643365379
Epoch 3 - Training loss: 0.08292141656425081
Epoch 4 - Training loss: 0.06946601166026488
Epoch 5 - Training loss: 0.05753123667488284
Epoch 6 - Training loss: 0.05288934595446025
Epoch 7 - Training loss: 0.04768460758649973
Epoch 8 - Training loss: 0.04158945380052778
Epoch 9 - Training loss: 0.04061357841404008
Epoch 10 - Training loss: 0.0353918119655378
Epoch 11 - Training loss: 0.035094421631944525
Epoch 12 - Training loss: 0.032365814349782876
Epoch 13 - Training loss: 0.03175527837349853
Epoch 14 - Training loss: 0.02608920624459794

Training Time (i


 62%|███████████████████████████████████████████████████▉                               | 5/8 [08:09<05:30, 110.21s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.975
Sequential(
  (0): Linear(in_features=784, out_features=1000, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1000, out_features=1000, bias=True)
  (3): ReLU()
  (4): Linear(in_features=1000, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.2733826006867929
Epoch 1 - Training loss: 0.1338205479188705
Epoch 2 - Training loss: 0.09780822474676282
Epoch 3 - Training loss: 0.07930538989206366
Epoch 4 - Training loss: 0.06957167068690952
Epoch 5 - Training loss: 0.05932666121253263
Epoch 6 - Training loss: 0.051079391532623246
Epoch 7 - Training loss: 0.047364730146655966
Epoch 8 - Training loss: 0.04366652489399542
Epoch 9 - Training loss: 0.03943063463392193
Epoch 10 - Training loss: 0.037723970340826966
Epoch 11 - Training loss: 0.03219681658319382
Epoch 12 - Training loss: 0.03708253048612539
Epoch 13 - Training loss: 0.027933533261738606
Epoch 14 - Training loss: 0.030921467814221185

Training Time (


 75%|██████████████████████████████████████████████████████████████▎                    | 6/8 [11:07<04:21, 130.63s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9741
Sequential(
  (0): Linear(in_features=784, out_features=1000, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1000, out_features=1000, bias=True)
  (3): ReLU()
  (4): Linear(in_features=1000, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.26834869831165015
Epoch 1 - Training loss: 0.1326597967226781
Epoch 2 - Training loss: 0.10080856077476287
Epoch 3 - Training loss: 0.08159206267883147
Epoch 4 - Training loss: 0.07131521683782022
Epoch 5 - Training loss: 0.05951930831140801
Epoch 6 - Training loss: 0.05193562586623023
Epoch 7 - Training loss: 0.0469320928121903
Epoch 8 - Training loss: 0.046024738757340894
Epoch 9 - Training loss: 0.03711375011516405
Epoch 10 - Training loss: 0.04029899022267905
Epoch 11 - Training loss: 0.03161937665285618
Epoch 12 - Training loss: 0.03323343321522162
Epoch 13 - Training loss: 0.028752484512743728
Epoch 14 - Training loss: 0.030245082738843045

Training Time (i


 88%|████████████████████████████████████████████████████████████████████████▋          | 7/8 [14:06<02:24, 144.87s/it][A

Number Of Images Tested = 10000

Model Accuracy = 0.9801
Sequential(
  (0): Linear(in_features=784, out_features=1000, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1000, out_features=1000, bias=True)
  (3): ReLU()
  (4): Linear(in_features=1000, out_features=10, bias=True)
  (5): LogSoftmax()
)
Epoch 0 - Training loss: 0.2719365803640026
Epoch 1 - Training loss: 0.1321930060349802
Epoch 2 - Training loss: 0.10050707347969066
Epoch 3 - Training loss: 0.08361671009122817
Epoch 4 - Training loss: 0.06733155606000789
Epoch 5 - Training loss: 0.06215147711018891
Epoch 6 - Training loss: 0.05449876948488134
Epoch 7 - Training loss: 0.04576720457190453
Epoch 8 - Training loss: 0.04280438546194578
Epoch 9 - Training loss: 0.041142678215924995
Epoch 10 - Training loss: 0.03524564981246109
Epoch 11 - Training loss: 0.031479754978651873
Epoch 12 - Training loss: 0.031118317447634523
Epoch 13 - Training loss: 0.03580389538211927
Epoch 14 - Training loss: 0.023820798691691777

Training Time (


100%|███████████████████████████████████████████████████████████████████████████████████| 8/8 [17:04<00:00, 128.08s/it][A
100%|████████████████████████████████████████████████████████████████████████████████| 6/6 [1:44:01<00:00, 1040.33s/it]

Number Of Images Tested = 10000

Model Accuracy = 0.9792



